File size: 6,592 Bytes
bc44e70
 
 
 
 
3330bf8
 
bc44e70
 
 
 
d8e9a40
bc44e70
 
 
 
 
 
 
 
 
 
 
 
3330bf8
bc44e70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3330bf8
bc44e70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3330bf8
bc44e70
 
 
 
 
 
 
 
 
 
3330bf8
 
bc44e70
 
 
 
 
 
3330bf8
bc44e70
 
 
 
 
 
 
 
 
3330bf8
 
bc44e70
3330bf8
bc44e70
 
 
3330bf8
bc44e70
 
3330bf8
 
 
 
 
 
 
 
 
 
 
 
 
bc44e70
 
3330bf8
 
bc44e70
 
 
 
3330bf8
bc44e70
 
 
 
 
 
 
3330bf8
 
 
 
bc44e70
 
3330bf8
 
 
 
 
 
bc44e70
 
 
 
 
 
 
 
 
 
 
 
 
 
3330bf8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
from sentence_transformers import SentenceTransformer
import chromadb
import pandas as pd
import gradio as gr
from datetime import datetime
import tempfile
import os 

# ===== Configuración =====
CHROMA_DIR = "chroma_db"
MODEL_NAME = "mrm8488/multilingual-e5-large-ft-sts-spanish-matryoshka-768-16-5e"
COLLECTION_NAMES = ["spc"]

# ===== 1. Conectar a la base de datos y cargar colecciones =====
print("Conectando a ChromaDB...")
client = chromadb.PersistentClient(path=CHROMA_DIR)
collections = [client.get_collection(name) for name in COLLECTION_NAMES]

# ===== 2. Cargar el modelo =====
print("Cargando modelo...")
model = SentenceTransformer(MODEL_NAME, trust_remote_code=True)
print("✓ Sistema listo")

# ===== 3. Función de búsqueda =====
def semantic_search(query: str, n_results: int = 20):
    print(f"Buscando: {query}")
    
    if not query.strip():
        return pd.DataFrame(), ""
    
    query_embedding = model.encode(query).tolist()

    all_results = []
    for collection in collections:
        results = collection.query(
            query_embeddings=[query_embedding],
            n_results=n_results,
            include=["documents", "metadatas", "distances"]
        )
        cosine_similarities = [1 - dist for dist in results['distances'][0]]
        
        for i in range(len(results['ids'][0])):
            result_dict = {
                'Relevante': False,  # Columna de selección al inicio
                'ID': results['ids'][0][i],
                'Similitud': round(cosine_similarities[i], 4),
                'Texto': results['documents'][0][i],
                'Colección': collection.name
            }
            
            metadata = results['metadatas'][0][i]
            if metadata:
                for key, value in metadata.items():
                    # Si hay URL, convertir a link
                    if key.lower() in ['url', 'link', 'enlace'] and value:
                        result_dict[key] = f'<a href="{value}" target="_blank">🔗 Abrir</a>'
                    else:
                        result_dict[key] = value
            
            all_results.append(result_dict)

    df = pd.DataFrame(all_results).sort_values('Similitud', ascending=False).head(n_results)
    df = df[['Número de Resolución', 'Fecha de Resolución', 'Texto', 'Enlace','Relevante']]
    print(f"Resultados: {len(df)}")
    
    return df, ""

# ===== 4. Función para exportar seleccionados a Excel =====
def export_to_excel(df_with_selection):
    if df_with_selection is None or len(df_with_selection) == 0:
        gr.Warning("No hay datos para exportar")
        return None
    
    # Filtrar solo los marcados como True en la columna 'Relevante'
    df_selected = df_with_selection[df_with_selection['Relevante'] == True].copy()
    
    if len(df_selected) == 0:
        gr.Warning("No has seleccionado ninguna decisión")
        return None
    
    # Quitar la columna de checkbox del export
    df_export = df_selected.drop(columns=['Relevante'])
    
    # Limpiar HTML de los links para Excel
    for col in df_export.columns:
        if df_export[col].dtype == 'object':
            df_export[col] = df_export[col].apply(
                lambda x: x.replace('<a href="', '').replace('" target="_blank">🔗 Abrir</a>', '') 
                if isinstance(x, str) and '<a href=' in x else x
            )
    
    # Guardar en carpeta temporal del sistema
    temp_dir = tempfile.gettempdir()
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = os.path.join(temp_dir, f"jurisprudencia_dc_{timestamp}.xlsx")
    df_export.to_excel(filename, index=False, engine='openpyxl')
    
    print(f"✓ Exportado: {filename} ({len(df_export)} registros)")
    gr.Info(f"✓ {len(df_export)} decisiones exportadas. Descargando...")
    return filename

# ===== 5. Interfaz Gradio =====

tema = gr.themes.Soft(
    primary_hue="blue",
).set(
    body_background_fill="*neutral_50",
    body_background_fill_dark="*neutral_50",
    block_background_fill="*neutral_50",
    block_background_fill_dark="*neutral_50",
    input_background_fill="white",
    input_background_fill_dark="white",
)
               
# ===== 5. Interfaz Gradio =====
with gr.Blocks(title="Buscador Jurisprudencia") as demo:
    gr.Markdown("# 🔍 Experto en Protección al Consumidor")
    gr.Markdown("Realiza tu consulta preguntando por temas o introduciendo los hechos del caso")
    
    # Query box
    query_box = gr.Textbox(
        show_label=False, 
        placeholder="Escribe tu consulta y presiona Enter...",
        lines=1
    )
    
    # Tabla de resultados (EDITABLE para marcar checkboxes)
    results_table = gr.Dataframe(
        label="Resultados",
        wrap=True,
        interactive=True,
        datatype=["number", "number", "str", "markdown", 'bool'],
        max_height=600,
        column_widths=["7%", "7%", "55%", "7%", "7%"]
    )
    
    # Botón de exportar abajo a la derecha
    with gr.Row(elem_id="export-row"):
        export_button = gr.Button("📥 Exportar a Excel", variant="primary", size="sm", elem_id="export-btn")
    
    # File output oculto para descargas automáticas
    file_output = gr.File(label="", visible=False)
    
    # Eventos
    query_box.submit(
        semantic_search, 
        [query_box], 
        [results_table, query_box]
    )
    
    export_button.click(
        export_to_excel,
        [results_table],
        [file_output]
    )

demo.launch(
    theme=tema,
    css="""
    /* Bordes gruesos */
    .svelte-u825rv td, 
    .svelte-u825rv th {
        border: 3px solid #888888 !important;
        padding: 12px !important;
    }
    
    .svelte-u825rv table {
        border-collapse: collapse !important;
    }
    
    /* Headers - targetear el span interno */
    .svelte-u825rv th .svelte-8fgf56.multiline.text {
        white-space: normal !important;
        word-wrap: break-word !important;
        text-align: center !important;
        display: block !important;
        line-height: 1.4 !important;
    }
    
    /* Centrar el header button también */
    .svelte-u825rv th .header-button {
        text-align: center !important;
        justify-content: center !important;
    }
    
    /* Checkbox con más sombra */
    input[type="checkbox"] {
        box-shadow: 0 4px 8px rgba(0, 0, 0, 0.3) !important;
        transform: scale(1.2);
    }
    
    /* Botón */
    #export-row { 
        justify-content: flex-end; 
        margin-top: 10px;
    }
    
    #export-btn { 
        max-width: 200px;
        min-width: 150px;
    }
    """
)