pruebas / app.py
alexis07's picture
Update app.py
d8e9a40 verified
from sentence_transformers import SentenceTransformer
import chromadb
import pandas as pd
import gradio as gr
from datetime import datetime
import tempfile
import os
# ===== Configuración =====
CHROMA_DIR = "chroma_db"
MODEL_NAME = "mrm8488/multilingual-e5-large-ft-sts-spanish-matryoshka-768-16-5e"
COLLECTION_NAMES = ["spc"]
# ===== 1. Conectar a la base de datos y cargar colecciones =====
print("Conectando a ChromaDB...")
client = chromadb.PersistentClient(path=CHROMA_DIR)
collections = [client.get_collection(name) for name in COLLECTION_NAMES]
# ===== 2. Cargar el modelo =====
print("Cargando modelo...")
model = SentenceTransformer(MODEL_NAME, trust_remote_code=True)
print("✓ Sistema listo")
# ===== 3. Función de búsqueda =====
def semantic_search(query: str, n_results: int = 20):
print(f"Buscando: {query}")
if not query.strip():
return pd.DataFrame(), ""
query_embedding = model.encode(query).tolist()
all_results = []
for collection in collections:
results = collection.query(
query_embeddings=[query_embedding],
n_results=n_results,
include=["documents", "metadatas", "distances"]
)
cosine_similarities = [1 - dist for dist in results['distances'][0]]
for i in range(len(results['ids'][0])):
result_dict = {
'Relevante': False, # Columna de selección al inicio
'ID': results['ids'][0][i],
'Similitud': round(cosine_similarities[i], 4),
'Texto': results['documents'][0][i],
'Colección': collection.name
}
metadata = results['metadatas'][0][i]
if metadata:
for key, value in metadata.items():
# Si hay URL, convertir a link
if key.lower() in ['url', 'link', 'enlace'] and value:
result_dict[key] = f'<a href="{value}" target="_blank">🔗 Abrir</a>'
else:
result_dict[key] = value
all_results.append(result_dict)
df = pd.DataFrame(all_results).sort_values('Similitud', ascending=False).head(n_results)
df = df[['Número de Resolución', 'Fecha de Resolución', 'Texto', 'Enlace','Relevante']]
print(f"Resultados: {len(df)}")
return df, ""
# ===== 4. Función para exportar seleccionados a Excel =====
def export_to_excel(df_with_selection):
if df_with_selection is None or len(df_with_selection) == 0:
gr.Warning("No hay datos para exportar")
return None
# Filtrar solo los marcados como True en la columna 'Relevante'
df_selected = df_with_selection[df_with_selection['Relevante'] == True].copy()
if len(df_selected) == 0:
gr.Warning("No has seleccionado ninguna decisión")
return None
# Quitar la columna de checkbox del export
df_export = df_selected.drop(columns=['Relevante'])
# Limpiar HTML de los links para Excel
for col in df_export.columns:
if df_export[col].dtype == 'object':
df_export[col] = df_export[col].apply(
lambda x: x.replace('<a href="', '').replace('" target="_blank">🔗 Abrir</a>', '')
if isinstance(x, str) and '<a href=' in x else x
)
# Guardar en carpeta temporal del sistema
temp_dir = tempfile.gettempdir()
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = os.path.join(temp_dir, f"jurisprudencia_dc_{timestamp}.xlsx")
df_export.to_excel(filename, index=False, engine='openpyxl')
print(f"✓ Exportado: {filename} ({len(df_export)} registros)")
gr.Info(f"✓ {len(df_export)} decisiones exportadas. Descargando...")
return filename
# ===== 5. Interfaz Gradio =====
tema = gr.themes.Soft(
primary_hue="blue",
).set(
body_background_fill="*neutral_50",
body_background_fill_dark="*neutral_50",
block_background_fill="*neutral_50",
block_background_fill_dark="*neutral_50",
input_background_fill="white",
input_background_fill_dark="white",
)
# ===== 5. Interfaz Gradio =====
with gr.Blocks(title="Buscador Jurisprudencia") as demo:
gr.Markdown("# 🔍 Experto en Protección al Consumidor")
gr.Markdown("Realiza tu consulta preguntando por temas o introduciendo los hechos del caso")
# Query box
query_box = gr.Textbox(
show_label=False,
placeholder="Escribe tu consulta y presiona Enter...",
lines=1
)
# Tabla de resultados (EDITABLE para marcar checkboxes)
results_table = gr.Dataframe(
label="Resultados",
wrap=True,
interactive=True,
datatype=["number", "number", "str", "markdown", 'bool'],
max_height=600,
column_widths=["7%", "7%", "55%", "7%", "7%"]
)
# Botón de exportar abajo a la derecha
with gr.Row(elem_id="export-row"):
export_button = gr.Button("📥 Exportar a Excel", variant="primary", size="sm", elem_id="export-btn")
# File output oculto para descargas automáticas
file_output = gr.File(label="", visible=False)
# Eventos
query_box.submit(
semantic_search,
[query_box],
[results_table, query_box]
)
export_button.click(
export_to_excel,
[results_table],
[file_output]
)
demo.launch(
theme=tema,
css="""
/* Bordes gruesos */
.svelte-u825rv td,
.svelte-u825rv th {
border: 3px solid #888888 !important;
padding: 12px !important;
}
.svelte-u825rv table {
border-collapse: collapse !important;
}
/* Headers - targetear el span interno */
.svelte-u825rv th .svelte-8fgf56.multiline.text {
white-space: normal !important;
word-wrap: break-word !important;
text-align: center !important;
display: block !important;
line-height: 1.4 !important;
}
/* Centrar el header button también */
.svelte-u825rv th .header-button {
text-align: center !important;
justify-content: center !important;
}
/* Checkbox con más sombra */
input[type="checkbox"] {
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.3) !important;
transform: scale(1.2);
}
/* Botón */
#export-row {
justify-content: flex-end;
margin-top: 10px;
}
#export-btn {
max-width: 200px;
min-width: 150px;
}
"""
)