geronimo-pericoli's picture
Update app.py
cf1692c verified
import os
import ast
import gradio as gr
from llama_index.core import (
StorageContext,
load_index_from_storage,
)
import pandas as pd
import nest_asyncio
nest_asyncio.apply()
from logging_manager import LoggingManager
# OpenAI API Key:
OPENAI_API_KEY = os.environ.get('openai')
# HuggingFace Token:
HF_TOKEN = os.environ.get('hf')
# Cohere Rerank Token:
COHERE_TOKEN = os.environ.get('cohere')
# Context:
exec(os.environ.get('context'))
### LOG MANAGER ###
logger = LoggingManager(
repo_name="pharma-IA",
project_id="logs-dataintegrity",
hf_token=HF_TOKEN
)
# Context:
exec(os.environ.get('context2'))
# Interface
css = """
#component-2 * {
font-size: small !important;
}
#component-13 textarea {
background: transparent !important;
}
#component-16 * {
font-size: small !important;
}
#btn_select {
font-size: x-small;
width:140px;
margin:auto;
}
#component-21 {
padding: 0 !important;
}
#component-21 span{
padding: 4px 0 0 8px !important;
font-size: small !important;
}
#component-21 p {
padding: 0 0 0 14px !important;
}
#component-21 input {
caret-color: var(--neutral-400) !important;
pointer-events: auto !important;
}
#component-21 .secondary-wrap {
background: var(--neutral-200) !important;
border-radius: 4px !important;
}
#component-21 .icon-wrap {
padding: 0 !important;
width: 36px !important;
}
#select_list {
padding: 0 !important;
}
#select_list label.selected {
background: var(--secondary-200) !important;
border-color: var(--secondary-400) !important;
}
#select_list label.selected input {
background-color: var(--secondary-400) !important;
border-color: var(--secondary-400) !important;
}
.message-row.bubble.user-row .user{
background-color: var(--secondary-200) !important;
border-color: var(--secondary-400) !important;
}
#markdown table {
font-size: x-small !important;
}
table {
font-size: x-small !important;
padding: 2px 3px !important;
}
.html-container {
padding: 0 !important;
margin: 0 !important;
}
"""
# Lista de choices, excluyendo las últimas dos en la selección inicial
choices_with_tools = [
("EMA", retriever_1_tool),
("FDA", retriever_2_tool),
("Validación de Software", retriever_3_tool),
("Integridad de datos Internacional", retriever_4_tool),
("Estudios Clínicos", retriever_5_tool),
("Sistema MES", retriever_6_tool),
("Sistema LIMS", retriever_7_tool),
("Validación de Software e Integridad de Datos – Argentina", retriever_8_tool),
("Validación de Software e Integridad de Datos – Perú", retriever_9_tool),
("Validación de Software e Integridad de Datos – México", retriever_10_tool),
("Validación de Software e Integridad de Datos – Colombia", retriever_11_tool),
("Validación de Software e Integridad de Datos – Ecuador", retriever_12_tool),
("Validación de Software e Integridad de Datos – Paraguay", retriever_13_tool),
("Validación de Software e Integridad de Datos – Brasil", retriever_14_tool),
("Herramienta de comparación", retriever_summary_tool),
]
# Solo extraer los nombres para mostrarlos en la interfaz
selected_choices = ["EMA", "FDA", "Validación de Software", "Integridad de datos Internacional", "Estudios Clínicos", "Sistema MES", "Sistema LIMS"]
choice_labels = [label for label, _ in choices_with_tools]
# Inicializar el texto de evaluación vacío
result_evals = ""
result_metadata = ""
result_texts = ""
import asyncio
async def process_query(message):
"""Ejecuta el workflow para obtener la respuesta del modelo."""
global selected_choices, chat_history, query
try:
selected_retrievers = [(choice, tool) for choice, tool in choices_with_tools if choice in selected_choices]
retriever_tools = [tool for _, tool in selected_retrievers]
w = RAGWorkflow(timeout=200)
handler = w.run(
query=str(message),
llm_multimodal=gpt_4_1_multimodal,
llm_selector=gpt_4_1,
llm_history=gpt_4_1_mini,
llm_kg=gpt_4_1_mini,
retrieve_tools=retriever_tools,
bm25_top_k=2,
max_tools=3,
rerank_limit=5,
stream=True
)
response = ""
final_dict = None
async for event in handler.stream_events():
if isinstance(event, ProgressEvent):
print("ProgressEvent:", event.msg, flush=True)
response += event.msg
yield ("progress", response, None) # (type, response, final_dict)
await asyncio.sleep(0.01)
elif isinstance(event, StopEvent):
print("StopEvent:", event.result["response"], flush=True)
final_dict = event.result
yield ("final", event.result["response"], final_dict)
except Exception as e:
print(f"Error: {e}", flush=True)
yield ("error", f"Error: {e}", None)
async def llm_response(message, history, profile: gr.OAuthProfile | None):
global result_texts, result_metadata, selected_choices, chat_history
global kg_source_nodes, final_response, query
# Asignar "Usuario no ingresado" si profile es None
user_name = "Usuario no ingresado" if profile is None else profile.name
final_dict = None
chat_history = logger.get_user_history(user_name, 2)
async for chunk_type, chunk_response, chunk_dict in process_query(message):
if chunk_type == "progress":
yield chunk_response
elif chunk_type == "final":
final_dict = chunk_dict
yield chunk_response
elif chunk_type == "error":
yield chunk_response
# After streaming is complete, process the final data if we have it
if final_dict is not None:
try:
final_response = final_dict
query=str(message)
# Extraer la información de los metadatos y textos de la respuesta
result_metadata = "\n".join(extraer_informacion_metadata(final_dict, kg_source_nodes, max_results=20) or [])
result_texts = extraer_textos_metadata(final_dict, max_results=20) or []
# Guardar la conversación en el dataset
logger.save_interaction(message, final_dict["response"], user_name)
logger.save_node_references(message, final_dict["source_nodes"], kg_source_nodes)
except Exception as e:
print(f"Error processing final data: {e}", flush=True)
with gr.Blocks(theme=gr.themes.Base(), css=css) as demo:
# Función para actualizar las choices seleccionadas
def update_selected_choices(choices):
global selected_choices
print("Selección actualizada: " + str(choices))
selected_choices = list(set(choices))
# Alternar la selección
def toggle_all(selected):
if len(selected) == len(choice_labels):
update_selected_choices([])
return []
else:
update_selected_choices(choice_labels)
return choice_labels
# Referencias
def get_ref():
return {simple_ref: gr.Markdown(result_metadata), texts: gr.HTML(str(result_texts))}
# Logs
def get_logs(selected_month):
df = logger.get_audit_trail(selected_month) # Carga el DataFrame
# Eliminar las columnas que no queremos mostrar (incluyendo las nuevas)
columns_to_hide = ['Document Nodes', 'KG Nodes', 'response_node_ids', 'kg_node_ids']
df = df.drop(columns=[col for col in columns_to_hide if col in df.columns], errors='ignore')
# Transformar la columna "User Message" para mostrar solo el texto y los archivos
if "User Message" in df.columns:
def format_user_message(msg):
try:
if isinstance(msg, str):
data = ast.literal_eval(msg)
text = data.get("text", "")
files = data.get("files", [])
if files:
return f"{text} (Adjunto: {', '.join(files)})"
return text
return msg
except:
return msg # Si hay error al parsear, devolver el mensaje original
df["User Message"] = df["User Message"].apply(format_user_message)
# Manejar valores NaN en la columna "Feedback"
if "Feedback" in df.columns:
df["Feedback"] = df["Feedback"].apply(lambda x: "-" if pd.isna(x) else x)
# Resto del código permanece igual...
# Preprocesar el texto Markdown para manejar correctamente los saltos de línea
def preprocess_markdown(text):
if not isinstance(text, str):
return text
# Reemplazar \n por dos espacios seguidos de \n (requerido por Markdown)
text = text.replace('\n', ' \n')
return text
# Función para envolver imágenes en enlaces y controlar tamaño
def process_images(html_content):
from bs4 import BeautifulSoup
soup = BeautifulSoup(html_content, 'html.parser')
for img in soup.find_all('img'):
# Crear enlace padre que abre en nueva pestaña
parent_link = soup.new_tag('a', href=img['src'], target='_blank')
img.wrap(parent_link)
# Aplicar estilos de tamaño máximo
img['style'] = "max-width: 300px; max-height: 300px; width: auto; height: auto; display: block;"
# Añadir indicador de que es clickeable
img['title'] = "Click para ver imagen completa"
return str(soup)
# Convierte la columna "Response" a Markdown con preprocesamiento
if "Response" in df.columns:
df["Response"] = df["Response"].apply(
lambda x: f'<div class="markdown-content">{preprocess_markdown(x)}</div>' if isinstance(x, str) else x
)
# Generar estilos CSS dinámicos para las columnas
column_styles = []
for col in df.columns:
if col == "Response":
# Dar más espacio a la columna Response
column_styles.append(f'.col-{col} {{ min-width: 60%; max-width: 70%; }}')
else:
# Columnas normales con ancho automático pero con máximo
column_styles.append(f'.col-{col} {{ width: auto; max-width: 30%; }}')
# Convierte el DataFrame en HTML con clases específicas por columna
table_html = df.to_html(index=False, escape=False, classes="table",
formatters={col: lambda x: f'<div class="col-{col}">{x}</div>'
for col in df.columns},
na_rep="-")
# Procesar las imágenes en el HTML generado
table_html = process_images(table_html)
# Genera el HTML completo
html_content = f"""
<!DOCTYPE html>
<html lang="es">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<script src="https://cdn.jsdelivr.net/npm/markdown-it/dist/markdown-it.min.js"></script>
<style>
body {{
font-family: "IBM Plex Sans", "Helvetica Neue", Arial, sans-serif;
margin: 0;
padding: 10px;
overflow-x: hidden;
font-size: x-small;
}}
.container {{
width: 100%;
overflow-x: auto;
box-sizing: border-box;
}}
.table {{
width: 100%;
border-collapse: collapse;
table-layout: auto;
}}
.table th, .table td {{
border: 1px solid #ddd;
padding: 4px;
text-align: left;
vertical-align: top;
}}
.table th {{
background-color: #f2f2f2;
position: sticky;
top: 0;
}}
/* Estilos dinámicos para columnas */
{''.join(column_styles)}
.markdown-content {{
max-width: 100%;
overflow-wrap: break-word;
}}
.markdown-content p {{
margin: 0;
padding: 0;
}}
/* Estilo para todas las celdas */
.table td > div {{
overflow: hidden;
text-overflow: ellipsis;
max-width: 100%;
word-break: break-word;
}}
/* Estilos para imágenes */
.markdown-content a > img {{
max-width: 300px;
max-height: 300px;
width: auto;
height: auto;
display: block;
margin: 5px 0;
cursor: pointer;
border: 1px solid #ddd;
border-radius: 4px;
}}
.markdown-content a > img:hover {{
opacity: 0.9;
box-shadow: 0 0 5px rgba(0,0,0,0.2);
}}
@media screen and (max-width: 600px) {{
.table {{
font-size: xx-small;
}}
.table th, .table td {{
padding: 2px;
}}
/* En móviles hacemos que Respuesta ocupe casi todo */
.col-Respuesta {{
min-width: 80% !important;
max-width: 90% !important;
}}
/* Otras columnas más pequeñas */
.table td > div:not(.col-Respuesta) {{
max-width: 20% !important;
}}
/* Imágenes más pequeñas en móvil */
.markdown-content a > img {{
max-width: 200px !important;
max-height: 200px !important;
}}
}}
</style>
</head>
<body>
<div class="container">
{table_html}
</div>
<script>
document.addEventListener("DOMContentLoaded", function () {{
var md = window.markdownit({{
breaks: true,
linkify: true
}});
// Procesar el markdown y luego las imágenes
document.querySelectorAll(".markdown-content").forEach(el => {{
const rendered = md.render(el.textContent);
el.innerHTML = rendered;
// Asegurar que todas las imágenes tengan el tratamiento correcto
el.querySelectorAll('img').forEach(img => {{
if (!img.parentElement.matches('a')) {{
const wrapper = document.createElement('a');
wrapper.href = img.src;
wrapper.target = '_blank';
img.parentNode.insertBefore(wrapper, img);
wrapper.appendChild(img);
}}
img.style.maxWidth = '300px';
img.style.maxHeight = '300px';
img.style.width = 'auto';
img.style.height = 'auto';
img.title = 'Click para ver imagen completa';
}});
}});
}});
</script>
</body>
</html>
"""
# Escapar comillas dobles para el iframe
html_content_escaped = html_content.replace('"', '&quot;')
return f'<iframe srcdoc="{html_content_escaped}" width="100%" height="500px" style="border:none; overflow:hidden;"></iframe>'
# Grafo
def get_graph():
iframe_grafo = draw_graph()
return {grafo: gr.HTML(iframe_grafo)}
# Evaluaciones
def get_evals():
global result_evals
global final_response
global query
# Verificar si 'final_response' está vacío
if not final_response:
if result_evals:
# Extraer solo el texto de la consulta
try:
# Si query es string, intentar convertir a dict
if isinstance(query, str):
import ast
query_dict = ast.literal_eval(query)
query_text = query_dict.get('text', 'Consulta sin texto')
elif isinstance(query, dict):
query_text = query.get('text', 'Consulta sin texto')
else:
query_text = str(query)
except:
query_text = str(query) # Fallback si hay error en la conversión
return {evals: gr.HTML(f"""
<div style="display: flex; justify-content: space-around; align-items: center; width: 100%; flex-direction: column; text-align: center; margin: 10px 0;">
{result_evals}
<p style="font-size: 10px;">Esta evaluación corresponde a la consulta: <strong>{query_text}</strong></p>
</div>
""")}
gr.Info("Se necesita una respuesta completa para iniciar la evaluación.")
return {evals: gr.HTML(f"""
<div style="display: flex; justify-content: space-around; align-items: center; width: 100%; flex-direction: column; text-align: center; margin: 6px 0;">
Se necesita una respuesta completa para iniciar la evaluación.</div>
""")}
# Ejecuta la evaluación si final_response está disponible
result_evals = evaluate()
# Reiniciar 'final_response' después de la evaluación
final_response = ""
# Extraer solo el texto de la consulta para el resultado final
try:
# Si query es string, intentar convertir a dict
if isinstance(query, str):
import ast
query_dict = ast.literal_eval(query)
query_text = query_dict.get('text', 'Consulta sin texto')
elif isinstance(query, dict):
query_text = query.get('text', 'Consulta sin texto')
else:
query_text = str(query)
except:
query_text = str(query) # Fallback si hay error en la conversión
# Devolver el resultado de la evaluación
return {
evals: gr.HTML(f"""
<div style="display: flex; justify-content: space-around; align-items: center; width: 100%; flex-direction: column; text-align: center; margin: 10px 0;">
{result_evals}
<p style="font-size: 10px;">Esta evaluación corresponde a la consulta: <strong>{query_text}</strong></p>
</div>
"""),
eval_accord: gr.Accordion(elem_classes="accordion", label="Evaluaciones", open=True)
}
gr.Markdown("## PharmaWise 5.0 - Data Integrity")
with gr.Row():
with gr.Column(scale=4):
chatbot=gr.Chatbot(show_label=False, min_height="660px", show_copy_button=True, show_share_button=False)
chat_interface = gr.ChatInterface(
fn=llm_response,
fill_height=True,
multimodal=True,
chatbot=chatbot
)
with gr.Column(scale=1):
gr.LoginButton(value="Ingresar", size="sm", min_width=220, icon=None, logout_value="Salir ({})")
# Dropdown
dropdown = gr.Dropdown(
choices=choice_labels, # Lista de opciones completas
value=selected_choices, # Solo estas estarán seleccionadas inicialmente
label="Base de datos del conocimiento",
elem_classes="dpdown",
multiselect=True,
info="Seleccionar los documentos que se deben considerar para generar tu respuesta."
)
dropdown.select(fn=update_selected_choices, inputs=dropdown)
# Botón para alternar selección
toggle_button = gr.Button("Seleccionar todo", elem_id="btn_select")
toggle_button.click(fn=toggle_all, inputs=dropdown, outputs=dropdown)
btn_eval = gr.Button(value="Evaluar")
gr.HTML("""
<div style="display: flex; justify-content: center; flex-wrap: wrap;">
<div class="file-card" onclick="window.open('', '_blank')">
<svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"
viewBox="0 0 512 512" xml:space="preserve" width="22px" height="22px" style="flex-shrink: 0;">
<path style="fill:#B3404A;" d="M437.456,512H21.212c-8.166,0-14.786-6.621-14.786-14.786V256.915c0-8.165,6.62-14.786,14.786-14.786
s14.786,6.621,14.786,14.786v225.512h386.671v-32.939c0-8.165,6.62-14.786,14.786-14.786s14.786,6.621,14.786,14.786v47.725
C452.242,505.379,445.622,512,437.456,512z"/>
<g>
<polygon style="fill:#F4B2B0;" points="21.212,177.092 21.212,172.3 176.068,14.786 176.068,177.092 "/>
<rect x="196.524" y="219.426" style="fill:#F4B2B0;" width="294.274" height="163.712"/>
</g>
<g>
<path style="fill:#B3404A;" d="M490.791,204.634h-38.549V14.786c0-8.165-6.62-14.786-14.786-14.786H176.068
c-0.067,0-0.132,0.009-0.198,0.01c-0.359,0.004-0.717,0.022-1.075,0.053c-0.12,0.01-0.241,0.021-0.361,0.034
c-0.41,0.046-0.816,0.105-1.22,0.185c-0.031,0.006-0.061,0.009-0.092,0.015c-0.432,0.089-0.858,0.2-1.28,0.325
c-0.111,0.033-0.22,0.071-0.33,0.106c-0.322,0.105-0.642,0.22-0.958,0.347c-0.108,0.044-0.217,0.086-0.324,0.132
c-0.807,0.346-1.585,0.766-2.326,1.257c-0.102,0.067-0.2,0.139-0.3,0.207c-0.274,0.191-0.541,0.392-0.803,0.603
c-0.099,0.08-0.198,0.157-0.294,0.24c-0.339,0.287-0.67,0.586-0.985,0.906L10.668,161.935c-0.346,0.352-0.671,0.719-0.977,1.1
c-0.182,0.226-0.342,0.463-0.509,0.696c-0.112,0.157-0.234,0.309-0.34,0.47c-0.194,0.294-0.364,0.599-0.534,0.903
c-0.062,0.112-0.133,0.219-0.192,0.333c-0.166,0.315-0.308,0.639-0.449,0.963c-0.05,0.114-0.108,0.225-0.155,0.34
c-0.126,0.312-0.231,0.628-0.336,0.946c-0.046,0.139-0.099,0.274-0.14,0.413c-0.087,0.294-0.152,0.591-0.22,0.889
c-0.04,0.172-0.087,0.34-0.121,0.515c-0.053,0.274-0.084,0.55-0.121,0.825c-0.027,0.201-0.062,0.399-0.081,0.6
c-0.025,0.268-0.03,0.535-0.04,0.801c-0.007,0.191-0.028,0.38-0.028,0.571v4.792c0,8.165,6.62,14.786,14.786,14.786h154.855
c8.166,0,14.786-6.621,14.786-14.786V29.572h231.816v175.062H196.518c-8.166,0-14.786,6.621-14.786,14.786v163.705
c0,8.165,6.62,14.786,14.786,14.786h294.272c8.166,0,14.786-6.621,14.786-14.786V219.421
C505.577,211.256,498.957,204.634,490.791,204.634z M51.772,162.308l47.938-48.76l61.571-62.63v111.39L51.772,162.308
L51.772,162.308z M476.005,368.339h-264.7V234.207h264.7V368.339z"/>
<path style="fill:#B3404A;" d="M246.08,260.736c0-3.2,2.925-6.015,7.375-6.015h26.322c16.785,0,30.008,7.934,30.008,29.433v0.64
c0,21.499-13.733,29.689-31.28,29.689h-12.589v27.641c0,4.096-4.959,6.142-9.919,6.142s-9.919-2.048-9.919-6.142L246.08,260.736
L246.08,260.736z M265.916,272.124v27.002h12.589c7.121,0,11.444-4.096,11.444-12.797v-1.406c0-8.703-4.323-12.797-11.444-12.797
h-12.589V272.124z"/>
<path style="fill:#B3404A;" d="M349.586,254.721c17.548,0,31.282,8.19,31.282,30.202v33.145c0,22.011-13.733,30.201-31.282,30.201
h-22.507c-5.214,0-8.647-2.815-8.647-6.014v-81.518c0-3.2,3.433-6.015,8.647-6.015h22.507V254.721z M338.269,272.124v58.739h11.317
c7.121,0,11.444-4.096,11.444-12.796v-33.145c0-8.703-4.323-12.797-11.444-12.797h-11.317V272.124z"/>
<path style="fill:#B3404A;" d="M393.458,260.863c0-4.096,4.323-6.142,8.647-6.142h44.125c4.196,0,5.977,4.479,5.977,8.574
c0,4.735-2.162,8.83-5.977,8.83h-32.935v21.628h19.201c3.815,0,5.977,3.711,5.977,7.806c0,3.456-1.78,7.55-5.977,7.55h-19.201
v33.016c0,4.096-4.959,6.142-9.919,6.142c-4.959,0-9.919-2.048-9.919-6.142V260.863z"/>
</g>
</svg>
<div class="file-name">Fuentes</div>
</div>
</div>
<style>
.file-card {
display: flex;
align-items: center; /* Centra verticalmente */
justify-content: center; /* Centra horizontalmente */
background-color: #e4e4e7;
border-radius: 5px; /* Bordes completamente redondeados */
padding: 7px 20px;
width: 100%;
transition: transform 0.3s ease, box-shadow 0.3s ease;
cursor: pointer;
max-width: 600px; /* Limitar el ancho máximo */
margin-bottom: 15px;
}
.file-card .file-name {
font-size: x-small;
font-weight: 600;
color: #333;
margin-left: 10px;
}
.file-card:hover {
background-color: #D4D4D8;
}
</style>
""")
with gr.Row():
with gr.Column(scale=1):
eval_accord = gr.Accordion(elem_classes="accordion", label="Evaluaciones", open=False)
with eval_accord:
evals = gr.HTML()
gr.Markdown("""| **Evaluador** | **Qué mide** | **Ejemplo de uso** | **Diferencias clave** |
|-----------------------|-------------------------------------------------------|-------------------------------------------------------|--------------------------------------------------------|
| **Groundedness** | Qué tan fundamentada está la respuesta en el contexto. | ¿La respuesta está respaldada por el contexto proporcionado? | Se enfoca en la relación entre la respuesta y el contexto. |
| **Answer Relevance** | Qué tan relevante es la respuesta para la consulta. | ¿La respuesta es pertinente a lo que el usuario preguntó? | Se centra en la relevancia de la respuesta ante la consulta. |
| **Context Relevance** | Qué tan relevante es el contexto recuperado para la consulta. | ¿El contexto obtenido es relevante para la consulta del usuario? | Se enfoca en la pertinencia del contexto en relación con la consulta. |
""")
with gr.Column(scale=1):
with gr.Accordion(elem_classes="accordion", label="Referencias", open=False):
simple_ref = gr.Markdown()
with gr.Accordion(elem_classes="accordion", label="Audit trail", open=False):
with gr.Row():
with gr.Row():
available_months = logger.get_available_log_months()
default_month = available_months[-1] if available_months else None
dropdown = gr.Dropdown(choices=available_months, label="Seleccionar mes", show_label=False, container=False, value=default_month)
btn_logs = gr.Button(value="Actualizar")
with gr.Column():
gr.Markdown()
with gr.Column():
gr.Markdown()
# Define un DataFrame con un ancho fijo para response_text
logs_df = gr.HTML()
with gr.Row():
grafo = gr.HTML(label="Grafo")
with gr.Accordion(elem_classes="accordion", label="Referencias ampliadas", open=False):
texts = gr.HTML()
chatbot.change(fn=get_graph, outputs=[grafo])
chatbot.change(fn=get_ref, outputs=[simple_ref, texts])
btn_logs.click(fn=get_logs, inputs=[dropdown], outputs=[logs_df])
btn_eval.click(fn=get_evals, outputs=[evals, eval_accord])
chatbot.like(logger.record_feedback, None, None)
demo.queue()
demo.launch(debug=True)