alexis07 commited on
Commit
bc44e70
·
verified ·
1 Parent(s): 5816272

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +136 -0
app.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+ import chromadb
3
+ import pandas as pd
4
+ import gradio as gr
5
+ from datetime import datetime
6
+
7
+ # ===== Configuración =====
8
+ CHROMA_DIR = "chroma_db"
9
+ MODEL_NAME = "mrm8488/multilingual-e5-large-ft-sts-spanish-matryoshka-768-16-5e"
10
+ COLLECTION_NAMES = ["consumidor_beta"]
11
+
12
+ # ===== 1. Conectar a la base de datos y cargar colecciones =====
13
+ print("Conectando a ChromaDB...")
14
+ client = chromadb.PersistentClient(path=CHROMA_DIR)
15
+ collections = [client.get_collection(name) for name in COLLECTION_NAMES]
16
+
17
+ # ===== 2. Cargar el modelo =====
18
+ print("Cargando modelo...")
19
+ model = SentenceTransformer(MODEL_NAME, trust_remote_code=True)
20
+ print("✓ Sistema listo")
21
+
22
+ # ===== 3. Función de búsqueda =====
23
+ def semantic_search(query: str, n_results: int = 5):
24
+ print(f"Buscando: {query}")
25
+
26
+ if not query.strip():
27
+ return pd.DataFrame(), ""
28
+
29
+ query_embedding = model.encode(query).tolist()
30
+
31
+ all_results = []
32
+ for collection in collections:
33
+ results = collection.query(
34
+ query_embeddings=[query_embedding],
35
+ n_results=n_results,
36
+ include=["documents", "metadatas", "distances"]
37
+ )
38
+ cosine_similarities = [1 - dist for dist in results['distances'][0]]
39
+
40
+ for i in range(len(results['ids'][0])):
41
+ result_dict = {
42
+ '✓': False, # Columna de selección al inicio
43
+ 'ID': results['ids'][0][i],
44
+ 'Similitud': round(cosine_similarities[i], 4),
45
+ 'Texto': results['documents'][0][i],
46
+ 'Colección': collection.name
47
+ }
48
+
49
+ metadata = results['metadatas'][0][i]
50
+ if metadata:
51
+ for key, value in metadata.items():
52
+ # Si hay URL, convertir a link
53
+ if key.lower() in ['url', 'link', 'enlace'] and value:
54
+ result_dict[key] = f'<a href="{value}" target="_blank">🔗 Abrir</a>'
55
+ else:
56
+ result_dict[key] = value
57
+
58
+ all_results.append(result_dict)
59
+
60
+ df = pd.DataFrame(all_results).sort_values('Similitud', ascending=False).head(n_results)
61
+ print(f"Resultados: {len(df)}")
62
+
63
+ return df, ""
64
+
65
+ # ===== 4. Función para exportar seleccionados a Excel =====
66
+ def export_to_excel(df_with_selection):
67
+ if df_with_selection is None or len(df_with_selection) == 0:
68
+ gr.Warning("No hay datos para exportar")
69
+ return None
70
+
71
+ # Filtrar solo los marcados como True en la columna '✓'
72
+ df_selected = df_with_selection[df_with_selection['✓'] == True].copy()
73
+
74
+ if len(df_selected) == 0:
75
+ gr.Warning("No has seleccionado ninguna decisión")
76
+ return None
77
+
78
+ # Quitar la columna de checkbox del export
79
+ df_export = df_selected.drop(columns=['✓'])
80
+
81
+ # Limpiar HTML de los links para Excel
82
+ for col in df_export.columns:
83
+ if df_export[col].dtype == 'object':
84
+ df_export[col] = df_export[col].apply(
85
+ lambda x: x.replace('<a href="', '').replace('" target="_blank">🔗 Abrir</a>', '')
86
+ if isinstance(x, str) and '<a href=' in x else x
87
+ )
88
+
89
+ # Guardar a Excel
90
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
91
+ filename = f"jurisprudencia_seleccionada_{timestamp}.xlsx"
92
+ df_export.to_excel(filename, index=False, engine='openpyxl')
93
+
94
+ print(f"✓ Exportado: {filename} ({len(df_export)} registros)")
95
+ gr.Info(f"✓ Exportadas {len(df_export)} decisiones")
96
+ return filename
97
+
98
+ # ===== 5. Interfaz Gradio =====
99
+ with gr.Blocks(title="Buscador Jurisprudencia") as demo:
100
+ gr.Markdown("# 🔍 Buscador de Jurisprudencia")
101
+ gr.Markdown("Busca, marca las decisiones útiles en la columna ✓ y exporta a Excel")
102
+
103
+ # Query box
104
+ query_box = gr.Textbox(
105
+ show_label=False,
106
+ placeholder="Escribe tu consulta legal y presiona Enter...",
107
+ lines=1
108
+ )
109
+
110
+ # Tabla de resultados (EDITABLE para marcar checkboxes)
111
+ results_table = gr.Dataframe(
112
+ label="Resultados",
113
+ wrap=True,
114
+ interactive=True, # Permite editar la columna de checkbox
115
+ datatype=["bool", "str", "number", "markdown", "str"] # Tipos de columnas
116
+ )
117
+
118
+ # Botón de exportar
119
+ with gr.Row():
120
+ export_button = gr.Button("📥 Exportar seleccionadas a Excel", variant="primary", scale=2)
121
+ file_output = gr.File(label="Descargar", scale=1)
122
+
123
+ # Eventos
124
+ query_box.submit(
125
+ semantic_search,
126
+ [query_box],
127
+ [results_table, query_box]
128
+ )
129
+
130
+ export_button.click(
131
+ export_to_excel,
132
+ [results_table],
133
+ [file_output]
134
+ )
135
+
136
+ demo.launch()