import gradio as gr
import json
import asyncio
import os
import sys
import re
FLOATING_CARD_JS = ''
# Note: showCiteCard/closeCiteCard JS and MathJax are now globally loaded via THEME_JS in app.py
import time
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from dotenv import load_dotenv
_project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
load_dotenv(os.path.join(_project_root, ".env"))
from backend.pipeline import ResearchPipeline
from backend.tools.search_engine import search
from backend.tools.graph_generator import generator as graph_generator
from modules.graph_module import generate_interactive_graph
from backend.synthesis import PROVIDERS
from backend.prompts.profiles import AGENT_PROFILES
from .utils import format_results_for_dataframe, format_error
DEFAULT_MODEL = "mistral-small-2506"
# Grupos de búsqueda
GROUPS = ["all", "latam", "global", "tesis", "iberoamerica", "peru", "brasil", "ecuador", "mexico", "ai_ml"]
# Fuentes individuales
INDIVIDUAL_SOURCES = [
"alicia", "renati", "lareferencia", "bdtd", "rraae",
"semantic", "openalex", "pubmed", "arxiv", "crossref",
"dblp", "scopus", "zenodo", "openaire", "doaj",
"core", "redalyc", "serpapi"
]
ALL_SOURCES = GROUPS + INDIVIDUAL_SOURCES
# ─── Module-level pipeline reference for stop/pause/resume ───
_active_pipeline = None
def _control_stop():
"""Stop the active pipeline"""
global _active_pipeline
if _active_pipeline:
_active_pipeline.stop()
return _build_status_html("error", "⛔ Detenido por el usuario")
return _build_status_html("idle")
def _control_pause():
"""Pause the active pipeline"""
global _active_pipeline
if _active_pipeline:
_active_pipeline.pause()
return _build_status_html("running", "⏸️ Pausado — haz clic en Reanudar")
return _build_status_html("idle")
def _control_resume():
"""Resume the active pipeline"""
global _active_pipeline
if _active_pipeline:
_active_pipeline.resume()
return _build_status_html("running", "▶️ Reanudado")
return _build_status_html("idle")
def _build_controls_html(state="idle"):
"""Build the control buttons bar matching Next.js AgentView"""
if state == "idle":
return '''
⏹️ Pipeline inactivo
'''
if state == "paused":
return '''
⏸️ Pipeline pausado
Haz clic en ▶ Reanudar para continuar
'''
if state == "stopped":
return '''
⛔ Pipeline detenido
'''
# running
return '''
⚡ Pipeline activo — usa los botones para controlar
'''
PHASES = [
{"id": -1, "label": "Verificación de Fuentes", "icon": "🏥", "pct": 0, "color": "#6b7280"},
{"id": 0, "label": "Optimización de Queries", "icon": "🧠", "pct": 5, "color": "#8b5cf6"},
{"id": 1, "label": "Búsqueda Iterativa", "icon": "🔍", "pct": 15, "color": "#3b82f6"},
{"id": 2, "label": "Detección de Vacíos", "icon": "🔎", "pct": 35, "color": "#06b6d4"},
{"id": 3, "label": "Búsqueda de Rescate", "icon": "🚑", "pct": 45, "color": "#f59e0b"},
{"id": 4, "label": "Plan Maestro", "icon": "📋", "pct": 55, "color": "#10b981"},
{"id": 5, "label": "Redacción de Secciones", "icon": "✍️", "pct": 65, "color": "#a855f7"},
{"id": 6, "label": "Validación y Corrección", "icon": "✅", "pct": 90, "color": "#22c55e"},
{"id": 7, "label": "Completado", "icon": "🎉", "pct": 100,"color": "#10b981"},
]
# ─── Source badge colors (matching search_tab.py) ───
SOURCE_COLORS = {
"pubmed": "#3b82f6", "semantic_scholar": "#8b5cf6", "openalex": "#06b6d4",
"crossref": "#f59e0b", "arxiv": "#ef4444", "doaj": "#10b981",
"zenodo": "#6366f1", "dblp": "#ec4899", "openaire": "#14b8a6",
"core": "#f97316", "scielo": "#22c55e", "redalyc": "#a855f7",
"latindex": "#0ea5e9", "dialnet": "#e11d48", "la_referencia": "#84cc16",
}
GRADE_COLORS = {
"1A": "#10b981", "1B": "#22c55e", "2A": "#3b82f6", "2B": "#60a5fa",
"3A": "#f59e0b", "3B": "#fbbf24", "4": "#f97316", "5": "#ef4444", "6": "#6b7280",
}
def update_models(prov_name):
cfg = PROVIDERS.get(prov_name, PROVIDERS["mistral"])
return gr.update(choices=cfg["models"], value=cfg["models"][0])
def _build_progress_html(phase_id, extra=""):
"""Build a premium glassmorphic progress bar matching the search-popup style"""
phase = next((p for p in PHASES if p["id"] == phase_id), PHASES[-1])
pct = phase["pct"]
label = phase["label"]
icon = phase["icon"]
color = phase["color"]
# Build phase dots
dots_html = ""
for p in PHASES:
if p["id"] < 0:
continue
is_done = p["pct"] <= pct and pct > 0
is_active = p["id"] == phase_id
dot_color = p["color"] if is_done else "rgba(255,255,255,0.1)"
dot_size = "10px" if is_active else "8px"
glow = f"box-shadow:0 0 8px {p['color']}60;" if is_active else ""
border = f"border:2px solid {p['color']};" if is_active else ""
dots_html += f''''''
extra_html = f'''{extra}
''' if extra else ""
pulse_anim = "animation:pulse 2s infinite;" if pct < 100 and pct > 0 else ""
return f'''
{icon}
{label}
Fase {max(0, phase_id + 1)} de {len(PHASES) - 1}
{pct}%
{dots_html}
{extra_html}
'''
def _build_status_html(state="idle", extra=""):
"""Build a premium status indicator"""
configs = {
"idle": {"color": "#6b7280", "icon": "⏹️", "label": "Inactivo", "bg": "rgba(107,114,128,0.08)", "border": "rgba(107,114,128,0.2)"},
"running": {"color": "#8b5cf6", "icon": "⚡", "label": "En ejecución...", "bg": "rgba(139,92,246,0.08)", "border": "rgba(139,92,246,0.3)"},
"done": {"color": "#10b981", "icon": "✅", "label": "Completado", "bg": "rgba(16,185,129,0.08)", "border": "rgba(16,185,129,0.3)"},
"error": {"color": "#ef4444", "icon": "❌", "label": "Error", "bg": "rgba(239,68,68,0.08)", "border": "rgba(239,68,68,0.3)"},
}
cfg = configs.get(state, configs["idle"])
pulse = "animation:pulse 2s infinite;" if state == "running" else ""
extra_html = f'{extra}' if extra else ""
return f'''
{cfg['icon']} {cfg['label']}
{extra_html}
'''
def _parse_sections_from_report(report_md):
if not report_md:
return {}
sections = {}
current = None
current_lines = []
for line in report_md.split("\n"):
# Match Markdown headers: ## Title or ### Title
m = re.match(r'^#{2,3}\s+(.+)', line)
# Match LaTeX headers: \section{Title}, \subsection{Title}, \subsubsection{Title}
if not m:
m = re.match(r'\\(?:sub)*section\{(.+?)\}', line)
if m:
if current:
sections[current] = "\n".join(current_lines).strip()
title = m.group(1).strip()
title = re.sub(r'^[🔬📝📊🔎🚑📋✍️✅🎉🏥🧠🔍\s]+', '', title).strip()
if not title:
title = current or "Sin título"
current = title
current_lines = []
else:
current_lines.append(line)
if current:
sections[current] = "\n".join(current_lines).strip()
return sections
def _build_references_html(docs_df, report_md=""):
if docs_df is None or docs_df.empty:
return "_Sin referencias disponibles aún..._"
import json as _json
import re
import math
import base64
# Extract cited indices from report_md
cited_indices = set()
if report_md:
for match in re.finditer(r'\[(\d+)\]', report_md):
cited_indices.add(int(match.group(1)))
has_text_produced = bool(report_md.strip())
html = ''
html += ''
for idx, row in docs_df.iterrows():
num = idx + 1
autores = str(row.get("Autores", ""))
año = str(row.get("Año", ""))
titulo = str(row.get("Título", ""))
fuente = str(row.get("Fuente", ""))
grade = str(row.get("GRADE", ""))
parts = [a.strip() for a in autores.split(",")]
surnames = [p.split()[-1] for p in parts if p and "..." not in p]
if len(surnames) == 1:
cite_text = f"{surnames[0]} ({año})"
elif len(surnames) == 2:
cite_text = f"{surnames[0]} y {surnames[1]} ({año})"
elif len(surnames) > 2:
cite_text = f"{surnames[0]} et al. ({año})"
else:
cite_text = f"Sin Autor ({año})"
level_key = grade.split(" - ")[0].strip().upper() if grade else "UNKNOWN"
color = GRADE_COLORS.get(level_key, "#6b7280")
import math
import base64
found = {k: ("" if (isinstance(v, float) and math.isnan(v)) else v) for k, v in row.to_dict().items()}
data_json = _json.dumps(found, ensure_ascii=False)
data_b64 = base64.b64encode(data_json.encode('utf-8')).decode('utf-8')
is_cited = str(num in cited_indices).lower()
initial_display = "flex" if idx < 10 else "none"
html += f'''
[{num}]
[{num}] {cite_text}.
{titulo}
{fuente}
{grade}
'''
html += '
'
html += ''
html += '
'
html += FLOATING_CARD_JS
return html
def _build_stats_html(report_md, docs_df):
"""Build a premium stats dashboard matching the search-popup card style"""
import pandas as pd
total_docs = len(docs_df) if docs_df is not None and not docs_df.empty else 0
sections = _parse_sections_from_report(report_md)
total_sections = len(sections)
word_count = len(report_md.split()) if report_md else 0
grade_data = {}
if docs_df is not None and not docs_df.empty and "GRADE" in docs_df.columns:
grade_data = docs_df["GRADE"].value_counts().to_dict()
# Build stat cards
stats = [
("📄", "Documentos", str(total_docs), "#3b82f6"),
("📑", "Secciones", str(total_sections), "#8b5cf6"),
("📝", "Palabras", f"{word_count:,}", "#10b981"),
]
cards_html = ""
for icon, label, val, color in stats:
cards_html += f'''
'''
# GRADE distribution badges
grade_html = ""
if grade_data:
grade_badges = ""
for label, count in sorted(grade_data.items(), key=lambda x: -x[1]):
level_key = label.split(" - ")[0].strip() if " - " in label else label
color = GRADE_COLORS.get(level_key.upper(), "#6b7280")
grade_badges += f'''
{label}: {count}
'''
grade_html = f'''
🏅 Distribución GRADE
{grade_badges}
'''
return f'''
{cards_html}
{grade_html}
'''
def _generate_graph_from_df(df):
return generate_interactive_graph(df)
def _detect_phase(report_md):
if not report_md:
return 0
text = report_md.lower()
if ("completado" in text and ("secciones:" in text or "docs citados:" in text)) or "fase 8" in text:
return 8
if "reporte final" in text or "generando reporte" in text:
return 7
if "grade" in text or "clasificación grade" in text:
return 6
if ("validación" in text or "validate" in text or "ara+" in text) and "recuperación" not in text:
return 6
if "redactando" in text or "redacción" in text or "writing" in text:
return 5
if "plan maestro" in text or "master plan" in text or "fase 4" in text:
return 4
if "rescate" in text or "rescue" in text or "fase 3" in text:
return 3
if "detección de vacíos" in text or "gap detection" in text or "fase 2" in text:
return 2
if "ronda" in text or "buscando" in text or "búsqueda" in text:
return 1
if "optimiz" in text or "query" in text:
return 0
return 0
# _refs_to_markdown removed, handled by _build_references_html
SECTION_COLORS = [
"#8b5cf6", "#3b82f6", "#06b6d4", "#10b981", "#f59e0b",
"#ef4444", "#ec4899", "#6366f1", "#14b8a6", "#f97316",
]
def _build_section_cards_html(sections_map, is_done=False):
"""Build glassmorphic expandable section cards"""
if not sections_map:
return '''
📑
Las secciones aparecerán aquí durante la ejecución...
'''
cards = ""
for i, (title, content) in enumerate(sections_map.items()):
color = SECTION_COLORS[i % len(SECTION_COLORS)]
word_count = len(content.split()) if content else 0
status_icon = "✅" if (is_done or word_count > 50) else "⏳"
sec_id = f"sec_{i}"
# Escape content for display
content_preview = content[:300].replace("<", "<").replace(">", ">") if content else ""
content_full = content.replace("<", "<").replace(">", ">") if content else ""
# Copy section button
content_escaped = content.replace("'", "\\'").replace("\n", "\\n").replace('"', '"') if content else ""
cards += f'''
'''
return f'''
{cards}
'''
# ══════════════════════════════════════════════════════════════
# INTERACTIVE CITATIONS (Floating Card on Click)
# ══════════════════════════════════════════════════════════════
def _build_docs_index(docs_df):
"""Build a lookup dict: author_year_key -> paper details."""
import pandas as pd
index = {}
if docs_df is None or (hasattr(docs_df, 'empty') and docs_df.empty):
return index
rows = docs_df.to_dict(orient="records") if hasattr(docs_df, 'to_dict') else []
for row in rows:
title = row.get("Título", row.get("title", ""))
authors_raw = row.get("Autores", row.get("authors", ""))
year = str(row.get("Año", row.get("year", "")))
doi = row.get("DOI", row.get("doi", ""))
source = row.get("Fuente", row.get("source", ""))
grade = row.get("GRADE", row.get("grade", ""))
pdf_url = row.get("PDF URL", row.get("pdf_url", ""))
# Extract surname(s)
if isinstance(authors_raw, list):
surnames = [a.split()[-1] for a in authors_raw[:3] if a]
authors_display = ", ".join(authors_raw[:3])
elif isinstance(authors_raw, str) and authors_raw:
parts = [a.strip() for a in authors_raw.split(",")]
surnames = [p.split()[-1] for p in parts[:3] if p]
authors_display = authors_raw
else:
surnames = []
authors_display = ""
# Build keys: "surname_year", "surname1_surname2_year" etc.
for s in surnames:
key = f"{s.lower()}_{year}"
if key not in index:
index[key] = {
"title": title, "authors": authors_display, "year": year,
"doi": doi, "source": source, "grade": grade, "pdf_url": pdf_url,
}
# Combined key for multi-author
if len(surnames) >= 2:
combined = "_".join(s.lower() for s in surnames[:2]) + f"_{year}"
index[combined] = {
"title": title, "authors": authors_display, "year": year,
"doi": doi, "source": source, "grade": grade, "pdf_url": pdf_url,
}
return index
def _latex_to_html(text):
"""Convert common LaTeX commands to HTML for browser rendering."""
if not text:
return text
# --- Structural commands ---
# \section{Title} -> Title
text = re.sub(r'\\section\*?\{(.+?)\}', r'\1
', text)
# \subsection{Title} -> Title
text = re.sub(r'\\subsection\*?\{(.+?)\}', r'\1
', text)
# \subsubsection{Title} -> Title
text = re.sub(r'\\subsubsection\*?\{(.+?)\}', r'\1
', text)
# --- Inline formatting ---
# \textbf{bold} -> bold
text = re.sub(r'\\textbf\{(.+?)\}', r'\1', text)
# \textit{italic} -> italic
text = re.sub(r'\\textit\{(.+?)\}', r'\1', text)
# \emph{text} -> text
text = re.sub(r'\\emph\{(.+?)\}', r'\1', text)
# \underline{text} -> text
text = re.sub(r'\\underline\{(.+?)\}', r'\1', text)
# --- Fix model hallucinative curly braces for taxonomy ---
# Convert {Word} to *Word* for markdown italics, ignoring {{BIB:ID}} and existing LaTeX commands
text = re.sub(r'(?', text)
text = re.sub(r'\\end\{itemize\}', '', text)
text = re.sub(r'\\item\s*', '', text)
# Fix stray "itemize" text that might remain if not paired
text = re.sub(r'(?im)^\s*itemize\s*$', '', text)
# Fix math units where AI writes $$g/ml instead of \mu g/ml
text = text.replace('$$g/ml', 'µg/ml')
text = text.replace('$$g', 'µg')
# --- CATALOGO DE TRADUCCION CIENTIFICA PARA FRONTEND ---
# 1. Notacion cientifica (x10^n o x 10^{n})
text = re.sub(r'(?i)x\s*10\^\{([^}]+)\}', r'× 10\1', text)
text = re.sub(r'(?i)x\s*10\^([0-9\-]+)', r'× 10\1', text)
# 2. Quimica y Subindices comunes (CO2, H2O, NO3-)
# Busca una letra mayuscula (opcional minuscula) seguida de _ y un numero. Ejemplo: CO_2 -> CO2
text = re.sub(r'([A-Z][a-z]?)_([0-9]+)', r'\1\2', text)
# Variante para {}: CO_{2} -> CO2
text = re.sub(r'([A-Z][a-z]?)_\{([0-9]+)\}', r'\1\2', text)
# 3. Superindices aislados sin $ (e.g. m^2 o cm^{3})
text = re.sub(r'([a-zA-Z]+)\^\{([0-9\-]+)\}', r'\1\2', text)
text = re.sub(r'([a-zA-Z]+)\^([0-9\-]+)', r'\1\2', text)
# 4. Temperaturas (25 oC, 25oC, 25°C)
text = re.sub(r'\b([0-9]+)\s*[oO]C\b', r'\1 °C', text)
# 5. Simbolos matematicos comunes escritos a mano
text = text.replace('+/-', '±')
text = text.replace('>=', '≥')
text = text.replace('<=', '≤')
# 6. Microgramos escritos con 'u' (ug/ml)
text = re.sub(r'\bug/ml\b', 'µg/ml', text)
text = re.sub(r'\bug/L\b', 'µg/L', text)
text = re.sub(r'\bug\b', 'µg', text)
# --------------------------------------------------------
text = re.sub(r'\\end\{enumerate\}', '', text)
text = re.sub(r'\\item\s*', '', text)
# --- Escaped characters ---
text = text.replace(r'\%', '%')
text = text.replace(r'\&', '&')
text = text.replace(r'\#', '#')
text = text.replace(r'\_', '_')
text = text.replace(r'\$', '$')
# --- Remove pure LaTeX boilerplate ---
text = re.sub(r'\\begin\{document\}', '', text)
text = re.sub(r'\\end\{document\}', '', text)
text = re.sub(r'\\begin\{abstract\}', '', text)
text = re.sub(r'\\end\{abstract\}', '', text)
text = re.sub(r'\\maketitle', '', text)
text = re.sub(r'\\documentclass\{[^}]*\}', '', text)
text = re.sub(r'\\usepackage\{[^}]*\}', '', text)
text = re.sub(r'\\title\{[^}]*\}', '', text)
text = re.sub(r'\\author\{[^}]*\}', '', text)
text = re.sub(r'\\date\{[^}]*\}', '', text)
# --- Citations: \cite{key} -> leave as-is for downstream processing ---
text = re.sub(r'\\cite\{([^}]+)\}', r'[\1]', text)
# --- Paragraph breaks: double newlines ---
text = re.sub(r'\n{2,}', '', text)
# --- Clean leftover backslash commands that are not math ---
# But preserve $...$ and $$...$$ for MathJax
text = re.sub(r'\\(?:noindent|newpage|clearpage|vspace\{[^}]*\}|hspace\{[^}]*\}|par)\b', '', text)
return text
def _make_citations_interactive(report_md, docs_df):
"""Convert LaTeX/Markdown report to HTML with clickable [[n]] citations and MathJax math rendering."""
import markdown as md_lib
import json as _json
if not report_md:
return '
Haz clic en el botón para ver el progreso en tiempo real...
'
# Build docs index
docs_index = _build_docs_index(docs_df)
# --- Phase 0: LaTeX to HTML pre-processing ---
processed = _latex_to_html(report_md)
# Convert remaining Markdown to HTML
try:
html_body = md_lib.markdown(
processed,
extensions=['tables', 'fenced_code', 'nl2br'],
)
except Exception:
html_body = processed.replace("\n\n", "").replace("\n", "
")
html_body = f"
{html_body}
"
cite_id_counter = [0]
# 1. First pass: Replace [[n]] {{BIB:ID}} markers with interactive citations
bib_pattern = re.compile(r'(?:\[\[(\d+)\]\]\s*)?\{\{BIB:([\w\.\-/]+)\}\}')
def replace_bib(match):
idx_str = match.group(1)
bib_id = match.group(2)
# Try to resolve by index first
if idx_str and docs_df is not None and not docs_df.empty:
try:
idx = int(idx_str) - 1
if 0 <= idx < len(docs_df):
row = docs_df.iloc[idx]
autores = str(row.get("Autores", ""))
año = str(row.get("Año", ""))
parts = [a.strip() for a in autores.split(",")]
surnames = [p.split()[-1] for p in parts if p and "..." not in p]
if len(surnames) == 1:
cite_text = f"[{idx+1}]"
elif len(surnames) == 2:
cite_text = f"[{idx+1}]"
elif len(surnames) > 2:
cite_text = f"[{idx+1}]"
else:
cite_text = f"[{idx+1}]"
# Build tooltip with author info
if len(surnames) >= 1:
if len(surnames) == 1:
tooltip = f"{surnames[0]} ({año})"
elif len(surnames) == 2:
tooltip = f"{surnames[0]} y {surnames[1]} ({año})"
else:
tooltip = f"{surnames[0]} et al. ({año})"
else:
tooltip = f"Fuente {idx+1} ({año})"
cite_id_counter[0] += 1
cid = cite_id_counter[0]
import math
import base64
found = {k: ("" if (isinstance(v, float) and math.isnan(v)) else v) for k, v in row.to_dict().items()}
data_json = _json.dumps(found, ensure_ascii=False)
data_b64 = base64.b64encode(data_json.encode('utf-8')).decode('utf-8')
return f'{cite_text}'
except Exception:
pass
# Fallback: show the [[n]] as a simple superscript
if idx_str:
return f'[{idx_str}]'
return ""
html_body = bib_pattern.sub(replace_bib, html_body)
# 1b. Also handle bare [[n]] without {{BIB:ID}} — common in some model outputs
bare_bracket_pattern = re.compile(r'\[\[(\d+)\]\]')
def replace_bare_bracket(match):
idx_str = match.group(1)
if docs_df is not None and not docs_df.empty:
try:
idx = int(idx_str) - 1
if 0 <= idx < len(docs_df):
row = docs_df.iloc[idx]
autores = str(row.get("Autores", ""))
año = str(row.get("Año", ""))
parts = [a.strip() for a in autores.split(",")]
surnames = [p.split()[-1] for p in parts if p and "..." not in p]
if len(surnames) >= 1:
if len(surnames) == 1:
tooltip = f"{surnames[0]} ({año})"
elif len(surnames) == 2:
tooltip = f"{surnames[0]} y {surnames[1]} ({año})"
else:
tooltip = f"{surnames[0]} et al. ({año})"
else:
tooltip = f"Fuente {idx+1}"
cite_id_counter[0] += 1
cid = cite_id_counter[0]
import math
import base64
found = {k: ("" if (isinstance(v, float) and math.isnan(v)) else v) for k, v in row.to_dict().items()}
data_json = _json.dumps(found, ensure_ascii=False)
data_b64 = base64.b64encode(data_json.encode('utf-8')).decode('utf-8')
return f'[{idx_str}]'
except Exception:
pass
return f'[{idx_str}]'
html_body = bare_bracket_pattern.sub(replace_bare_bracket, html_body)
# 2. Second pass: Find and wrap existing manual APA citations: (Author, Year)
citation_pattern = re.compile(
r'\(([A-ZÁÉÍÓÚÑ][a-záéíóúñ]+(?:\s*(?:&|&|y|et\s+al\.?|,\s*[A-ZÁÉÍÓÚÑ][a-záéíóúñ]+))*)\s*,\s*(\d{4}|s\.f\.)\)'
)
def replace_citation(match):
full_match = match.group(0)
authors_part = match.group(1)
year_part = match.group(2)
author_names = re.split(r'\s*(?:&|&|y|,)\s*', authors_part)
author_names = [a.strip().replace("et al.", "").strip() for a in author_names if a.strip()]
found = None
for a in author_names:
surname = a.split()[-1].lower() if a else ""
key = f"{surname}_{year_part}"
if key in docs_index:
found = docs_index[key]
break
if not found and len(author_names) >= 2:
combined = "_".join(a.split()[-1].lower() for a in author_names[:2]) + f"_{year_part}"
if combined in docs_index:
found = docs_index[combined]
if not found:
return f'{full_match}'
cite_id_counter[0] += 1
cid = cite_id_counter[0]
import math
import base64
found_clean = {k: ("" if (isinstance(v, float) and math.isnan(v)) else v) for k, v in found.items()}
data_json = _json.dumps(found_clean, ensure_ascii=False)
data_b64 = base64.b64encode(data_json.encode('utf-8')).decode('utf-8')
return f'{full_match}'
html_body = citation_pattern.sub(replace_citation, html_body)
# Build the floating card container + JS + MathJax
floating_card_js = FLOATING_CARD_JS
return f'''
{html_body}
{floating_card_js}
'''
# ══════════════════════════════════════════════════════════════
# RESEARCH HANDLER
# ══════════════════════════════════════════════════════════════
async def research_handler(
query, provider, search_model, synthesis_model, translation_model,
profile, depth, iterations, include_validation, sources,
enable_dme=True, synthesis_strategy="auto",
year_start="", year_end="", university="",
infinite_output=True, max_continuation=5,
grade_mode="original", geo_context="Automático"
):
import pandas as pd
empty_df = pd.DataFrame(columns=["Título", "Autores", "Año", "DOI", "Fuente", "GRADE", "PDF URL"])
ref_md = "_Sin referencias disponibles aún..._"
stats_html = _build_stats_html("", empty_df)
if not query or not query.strip():
gr.Warning("Ingrese un tema de investigación")
yield _build_status_html("error", "Sin consulta"), _build_progress_html(-1), \
"**Error:** Ingrese un tema de investigación.", empty_df, \
"", ref_md, stats_html, ""
return
api_key = os.getenv(PROVIDERS.get(provider, {}).get("env_key", ""), "")
if not api_key:
env_key = PROVIDERS.get(provider, {}).get("env_key", "?")
gr.Warning(f"No hay API key para {provider}. Configure {env_key} en .env")
yield _build_status_html("error", "API key faltante"), _build_progress_html(-1), \
f"**Error:** No hay API key para {provider}. Configure `{env_key}` en .env", \
empty_df, "", ref_md, stats_html, ""
return
# Iniciar registro en BD
from backend.database.models import SessionLocal, User, Project, ResearchJob
db_job = None
db = SessionLocal()
user = db.query(User).filter(User.username == "admin").first()
if user:
project = Project(title=f"Investigación: {query[:50]}", owner_id=user.id)
db.add(project)
db.commit()
db_job = ResearchJob(project_id=project.id, query=query, status="running")
db.add(db_job)
db.commit()
db.refresh(db_job)
db.close()
search_sources = sources if sources else ["all"]
pipeline = ResearchPipeline(
provider=provider, search_model=search_model,
synthesis_model=synthesis_model, translation_model=translation_model,
api_key=api_key,
)
global _active_pipeline
_active_pipeline = pipeline
accumulated_report = ""
accumulated_df = empty_df
current_phase = -1
try:
async for report_md, docs_df in pipeline.run(
query=query.strip(), sources=search_sources, profile=profile,
depth=int(depth), iterations=int(iterations),
include_validation=include_validation,
enable_dme=enable_dme, synthesis_strategy=synthesis_strategy,
year_start=year_start or None, year_end=year_end or None,
university=university or None, grade_mode=grade_mode,
geo_context=geo_context,
infinite_output=infinite_output,
max_continuation_passes=int(max_continuation),
):
accumulated_report = report_md
if docs_df is not None and not docs_df.empty:
accumulated_df = docs_df
detected_phase = _detect_phase(report_md)
current_phase = detected_phase
sections_map = _parse_sections_from_report(accumulated_report)
last_key = list(sections_map.keys())[-1] if sections_map else ""
extra = f"{len(accumulated_df)} docs" if len(accumulated_df) else ""
if current_phase == 5 and last_key:
extra = f"Redactando: {last_key}"
progress_html = _build_progress_html(current_phase, extra)
ref_md = _build_references_html(docs_df, accumulated_report)
stats_html = _build_stats_html(accumulated_report, accumulated_df)
sections_content = _build_section_cards_html(sections_map)
paused_label = " ⏸️" if pipeline.is_paused else ""
yield (
_build_status_html("running", f"Fase {current_phase}{paused_label}"),
progress_html, _make_citations_interactive(accumulated_report, accumulated_df), accumulated_df,
sections_content, ref_md, stats_html, accumulated_report,
)
sections_map = _parse_sections_from_report(accumulated_report)
sections_content = _build_section_cards_html(sections_map, is_done=True)
ref_md = _build_references_html(docs_df, accumulated_report)
stats_html = _build_stats_html(accumulated_report, accumulated_df)
yield (
_build_status_html("done", f"{len(accumulated_df)} docs | {len(sections_map)} secciones"),
_build_progress_html(7), _make_citations_interactive(accumulated_report, accumulated_df), accumulated_df,
sections_content, ref_md, stats_html, accumulated_report,
)
if db_job:
from datetime import datetime
db = SessionLocal()
job = db.query(ResearchJob).get(db_job.id)
if job:
job.status = "completed"
job.report_md = accumulated_report
job.completed_at = datetime.utcnow()
db.commit()
db.close()
except (StopAsyncIteration, asyncio.CancelledError):
# Pipeline was stopped by user
sections_map = _parse_sections_from_report(accumulated_report)
sections_content = _build_section_cards_html(sections_map, is_done=True)
ref_md = _build_references_html(docs_df, accumulated_report)
stats_html = _build_stats_html(accumulated_report, accumulated_df)
yield (
_build_status_html("error", "⛔ Detenido por el usuario"),
_build_progress_html(current_phase, "Detenido"),
_make_citations_interactive(accumulated_report + "\n\n---\n⛔ **Pipeline detenido por el usuario**", accumulated_df),
accumulated_df, sections_content, ref_md, stats_html,
accumulated_report
)
except Exception as e:
if db_job:
db = SessionLocal()
job = db.query(ResearchJob).get(db_job.id)
if job:
job.status = "error"
db.commit()
db.close()
yield (
_build_status_html("error", str(e)[:60]),
_build_progress_html(current_phase),
_make_citations_interactive(f"**Error:** {str(e)}", accumulated_df), accumulated_df, "", ref_md, stats_html,
accumulated_report
)
finally:
_active_pipeline = None
await pipeline.close()
# ══════════════════════════════════════════════════════════════
# SUPER RESEARCH HANDLER
# ══════════════════════════════════════════════════════════════
async def super_research_handler(
query, provider, search_model, synthesis_model, translation_model,
profile, depth, rounds, include_validation, sources,
enable_dme=True, synthesis_strategy="auto",
year_start="", year_end="", university="",
infinite_output=True, max_continuation=5,
grade_mode="original", geo_context="Automático"
):
import pandas as pd
empty_df = pd.DataFrame(columns=["Título", "Autores", "Año", "DOI", "Fuente", "GRADE", "PDF URL"])
ref_md = "_Sin referencias disponibles aún..._"
stats_html = _build_stats_html("", empty_df)
if not query or not query.strip():
gr.Warning("Ingrese un tema de investigación")
yield _build_status_html("error", "Sin consulta"), _build_progress_html(-1), \
"**Error:** Ingrese un tema de investigación.", empty_df, \
"", ref_md, stats_html, ""
return
api_key = os.getenv(PROVIDERS.get(provider, {}).get("env_key", ""), "")
if not api_key:
env_key = PROVIDERS.get(provider, {}).get("env_key", "?")
gr.Warning(f"No hay API key para {provider}. Configure {env_key} en .env")
yield _build_status_html("error", "API key faltante"), _build_progress_html(-1), \
f"**Error:** No hay API key para {provider}. Configure `{env_key}` en .env", \
empty_df, "", ref_md, stats_html, ""
return
from backend.database.models import SessionLocal, User, Project, ResearchJob
db_job = None
db = SessionLocal()
user = db.query(User).filter(User.username == "admin").first()
if user:
project = Project(title=f"Super Inv: {query[:50]}", owner_id=user.id)
db.add(project)
db.commit()
db_job = ResearchJob(project_id=project.id, query=query, status="running")
db.add(db_job)
db.commit()
db.refresh(db_job)
db.close()
search_sources = sources if sources else ["all"]
pipeline = ResearchPipeline(
provider=provider, search_model=search_model,
synthesis_model=synthesis_model, translation_model=translation_model,
api_key=api_key,
)
global _active_pipeline
_active_pipeline = pipeline
accumulated_report = ""
accumulated_df = empty_df
current_phase = -1
try:
async for report_md, docs_df in pipeline.run(
query=query.strip(), sources=search_sources, profile=profile,
depth=int(depth), iterations=int(rounds),
include_validation=include_validation,
enable_dme=enable_dme, synthesis_strategy=synthesis_strategy,
year_start=year_start or None, year_end=year_end or None,
university=university or None, grade_mode=grade_mode,
geo_context=geo_context,
infinite_output=infinite_output,
max_continuation_passes=int(max_continuation),
):
accumulated_report = report_md
if docs_df is not None and not docs_df.empty:
accumulated_df = docs_df
detected_phase = _detect_phase(report_md)
current_phase = detected_phase
sections_map = _parse_sections_from_report(accumulated_report)
last_key = list(sections_map.keys())[-1] if sections_map else ""
extra = f"{len(accumulated_df)} docs" if len(accumulated_df) else ""
if current_phase == 5 and last_key:
extra = f"Redactando: {last_key}"
progress_html = _build_progress_html(current_phase, extra)
ref_md = _build_references_html(docs_df, accumulated_report)
stats_html = _build_stats_html(accumulated_report, accumulated_df)
sections_content = _build_section_cards_html(sections_map)
paused_label = " ⏸️" if pipeline.is_paused else ""
yield (
_build_status_html("running", f"Fase {current_phase}{paused_label}"),
progress_html, _make_citations_interactive(accumulated_report, accumulated_df), accumulated_df,
sections_content, ref_md, stats_html, accumulated_report
)
sections_map = _parse_sections_from_report(accumulated_report)
sections_content = _build_section_cards_html(sections_map, is_done=True)
ref_md = _build_references_html(docs_df, accumulated_report)
stats_html = _build_stats_html(accumulated_report, accumulated_df)
yield (
_build_status_html("done", f"{len(accumulated_df)} docs | {len(sections_map)} secciones"),
_build_progress_html(7), _make_citations_interactive(accumulated_report, accumulated_df), accumulated_df,
sections_content, ref_md, stats_html, accumulated_report
)
if db_job:
from datetime import datetime
db = SessionLocal()
job = db.query(ResearchJob).get(db_job.id)
if job:
job.status = "completed"
job.report_md = accumulated_report
job.completed_at = datetime.utcnow()
db.commit()
db.close()
except (StopAsyncIteration, asyncio.CancelledError):
sections_map = _parse_sections_from_report(accumulated_report)
sections_content = _build_section_cards_html(sections_map, is_done=True)
ref_md = _build_references_html(docs_df, accumulated_report)
stats_html = _build_stats_html(accumulated_report, accumulated_df)
yield (
_build_status_html("error", "⛔ Detenido por el usuario"),
_build_progress_html(current_phase, "Detenido"),
_make_citations_interactive(accumulated_report + "\n\n---\n⛔ **Pipeline detenido por el usuario**", accumulated_df),
accumulated_df, sections_content, ref_md, stats_html,
accumulated_report
)
except Exception as e:
if db_job:
db = SessionLocal()
job = db.query(ResearchJob).get(db_job.id)
if job:
job.status = "error"
db.commit()
db.close()
yield (
_build_status_html("error", str(e)[:60]),
_build_progress_html(current_phase),
_make_citations_interactive(f"**Error:** {str(e)}", accumulated_df), accumulated_df, "", ref_md, stats_html,
accumulated_report
)
finally:
_active_pipeline = None
await pipeline.close()
# ══════════════════════════════════════════════════════════════
# SÍNTESIS HANDLER
# ══════════════════════════════════════════════════════════════
async def synthesis_handler(
query, docs_text, provider, search_model, synthesis_model,
translation_model, profile, include_validation,
enable_dme=True, synthesis_strategy="auto",
grade_mode="original", geo_context="Automático",
):
import pandas as pd
empty_df = pd.DataFrame(columns=["Título", "Autores", "Año", "DOI", "Fuente", "GRADE", "PDF URL"])
ref_md = "_Sin referencias disponibles aún..._"
stats_html = _build_stats_html("", empty_df)
if not query or not query.strip():
gr.Warning("Ingrese un tema/título")
yield _build_status_html("error", "Sin consulta"), _build_progress_html(-1), \
"**Error:** Ingrese un tema o título para la síntesis.", empty_df, \
"", ref_md, stats_html, ""
return
if not docs_text or not docs_text.strip():
gr.Warning("Ingrese al menos 5 documentos")
yield _build_status_html("error", "Sin documentos"), _build_progress_html(-1), \
"**Error:** Pegue la lista de documentos en el campo de texto.", empty_df, \
"", ref_md, stats_html, ""
return
api_key = os.getenv(PROVIDERS.get(provider, {}).get("env_key", ""), "")
if not api_key:
env_key = PROVIDERS.get(provider, {}).get("env_key", "?")
gr.Warning(f"No hay API key para {provider}. Configure {env_key} en .env")
yield _build_status_html("error", "API key faltante"), _build_progress_html(-1), \
f"**Error:** No hay API key para {provider}. Configure `{env_key}` en .env", \
empty_df, "", ref_md, stats_html, ""
return
pipeline = ResearchPipeline(
provider=provider, search_model=search_model,
synthesis_model=synthesis_model, translation_model=translation_model,
api_key=api_key,
)
accumulated_report = ""
current_phase = 0
try:
async for report_md, docs_df in pipeline.run(
query=query.strip(), sources=[], profile=profile,
iterations=0, include_validation=include_validation,
docs_text=docs_text, enable_dme=enable_dme,
synthesis_strategy=synthesis_strategy,
grade_mode=grade_mode, geo_context=geo_context,
):
accumulated_report = report_md
detected_phase = _detect_phase(report_md)
if detected_phase != current_phase:
current_phase = detected_phase
sections_map = _parse_sections_from_report(accumulated_report)
sections_content = _build_section_cards_html(sections_map)
ref_md = _build_references_html(docs_df, accumulated_report)
stats_html = _build_stats_html(accumulated_report, empty_df)
yield (
_build_status_html("running", "Sintetizando"),
_build_progress_html(current_phase), accumulated_report, empty_df,
sections_content, ref_md, stats_html, accumulated_report
)
sections_map = _parse_sections_from_report(accumulated_report)
sections_content = _build_section_cards_html(sections_map, is_done=True)
ref_md = _build_references_html(docs_df, accumulated_report)
stats_html = _build_stats_html(accumulated_report, empty_df)
yield (
_build_status_html("done", "Síntesis completada"),
_build_progress_html(7), accumulated_report, empty_df,
sections_content, ref_md, stats_html, accumulated_report
)
except Exception as e:
yield (
_build_status_html("error", str(e)[:60]),
_build_progress_html(current_phase),
f"**Error:** {str(e)}", empty_df, "", ref_md, stats_html,
)
finally:
await pipeline.close()
# ══════════════════════════════════════════════════════════════
# HELPER: Build a premium tab section (shared layout)
# ══════════════════════════════════════════════════════════════
def _build_research_panel(prefix, title, subtitle, btn_label, handler_fn, is_super=False):
"""Build a unified premium research panel for Research/Super/Synthesis tabs"""
# ─── Header banner ───
gr.HTML(f'''
{"🚀" if is_super else "🔬"}
Pipeline v2.0
''')
with gr.Row():
# ─── LEFT: Controls ───
with gr.Column(scale=2):
status = gr.HTML(_build_status_html("idle"))
progress = gr.HTML(_build_progress_html(-1, "Esperando consulta..."))
gr.HTML('''''')
query = gr.Textbox(
label="",
placeholder="Ej: Impacto de la IA en la educación superior en Perú",
lines=3, show_label=False,
elem_classes=["glass-input-wrapper"]
)
with gr.Row():
prov = gr.Dropdown(
choices=list(PROVIDERS.keys()), value="mistral",
label="⚡ Proveedor IA", scale=1,
)
with gr.Accordion("🤖 Modelos por Rol", open=False):
search_m = gr.Dropdown(
choices=PROVIDERS["mistral"]["models"],
value=DEFAULT_MODEL, label="🔍 Búsqueda",
)
synth_m = gr.Dropdown(
choices=PROVIDERS["mistral"]["models"],
value=DEFAULT_MODEL, label="📝 Síntesis",
)
trans_m = gr.Dropdown(
choices=PROVIDERS["mistral"]["models"],
value=DEFAULT_MODEL, label="🌐 Traducción",
)
prov.change(
fn=update_models, inputs=[prov],
outputs=[search_m, synth_m, trans_m],
)
with gr.Accordion("📚 Parámetros de Búsqueda", open=False):
src = gr.CheckboxGroup(
choices=ALL_SOURCES, value=ALL_SOURCES, label="Fuentes", show_label=False,
)
gr.HTML('''
all = todas
latam = Latinoamérica
global = PubMed+ArXiv+OpenAlex
''')
with gr.Row():
prof = gr.Dropdown(
choices=list(AGENT_PROFILES.keys()),
value="auto", label="🎭 Perfil",
)
dep = gr.Slider(minimum=1, maximum=5, value=3, step=1, label="📏 Profundidad")
if is_super:
iters = gr.Slider(minimum=2, maximum=5, value=3, step=1, label="🔄 Rondas")
else:
iters = gr.Slider(minimum=1, maximum=5, value=1, step=1, label="🔄 Iteraciones")
with gr.Accordion("🔧 Opciones Avanzadas", open=False):
geo = gr.Textbox(value="Automático", label="📍 Contexto Geográfico (País/Universidad)", placeholder="Ej: Perú, Universidad Nacional del Santa")
val = gr.Checkbox(value=True, label="🔬 Validación de citas (ARA+)")
dme = gr.Checkbox(value=True, label="🔧 DME: Reparación + Enriquecimiento")
strat = gr.Radio(
choices=["lineal", "jerárquica", "auto"],
value="jerárquica", label="📐 Estrategia de Síntesis",
)
grade_mode = gr.Radio(
choices=["original", "keywords", "llm", "oxford", "hybrid"],
value="original", label="📊 Algoritmo GRADE",
info="original: Beta SX | keywords: Rápido | llm: IA Preciso | oxford: CEBM | hybrid: Mixto",
)
with gr.Row():
yr_s = gr.Textbox(label="📅 Año inicio", placeholder="2020")
yr_e = gr.Textbox(label="📅 Año fin", placeholder="2025")
uni = gr.Textbox(label="🏛️ Universidad", placeholder="Ej: UNMSM")
inf_out = gr.Checkbox(value=True, label="♾️ Output Infinito")
max_cont = gr.Slider(minimum=1, maximum=10, value=5, step=1, label="🔁 Max Continuaciones")
btn = gr.Button(
btn_label, variant="primary", size="lg",
elem_classes=["ejecutar-btn"]
)
# ─── Control Buttons (Stop/Pause/Resume) ───
with gr.Row():
pause_btn = gr.Button(
"⏸️ Pausar", size="sm", variant="secondary",
elem_classes=["control-btn-pause"]
)
resume_btn = gr.Button(
"▶️ Reanudar", size="sm", variant="secondary",
elem_classes=["control-btn-resume"]
)
stop_btn = gr.Button(
"⛔ Detener", size="sm", variant="stop",
elem_classes=["control-btn-stop"]
)
# ─── RIGHT: Results ───
with gr.Column(scale=3):
with gr.Tabs():
with gr.TabItem("📄 Informe"):
report = gr.HTML(_make_citations_interactive("", None))
with gr.TabItem("📚 Referencias"):
refs = gr.HTML("_Las referencias aparecerán durante la ejecución..._")
with gr.TabItem("📑 Secciones"):
sections = gr.HTML(_build_section_cards_html({}))
with gr.TabItem("📊 Estadísticas"):
stats = gr.HTML(_build_stats_html("", None))
with gr.TabItem("📋 Documentos"):
docs = gr.Dataframe(
headers=["Título", "Autores", "Año", "DOI", "Fuente", "GRADE", "PDF URL"],
label="Documentos Encontrados", wrap=True,
)
with gr.TabItem("🌐 Grafo"):
graph_btn = gr.Button("🌐 Generar Grafo de Relaciones", size="sm", elem_classes=["ejecutar-btn"])
graph_html = gr.HTML('''
🌐
Haz clic en el botón para generar el grafo.
''')
graph_btn.click(fn=_generate_graph_from_df, inputs=[docs], outputs=[graph_html])
report_md_state = gr.State("")
with gr.TabItem("📥 Exportar"):
gr.HTML('''
📥 Exportar Resultados
Descarga el informe y los documentos en distintos formatos.
''')
with gr.Row():
export_md_btn = gr.Button("📄 Markdown (.md)", size="sm", variant="secondary")
export_bib_btn = gr.Button("📚 BibTeX (.bib)", size="sm", variant="secondary")
with gr.Row():
export_docx_btn = gr.Button("📝 Word (.docx)", size="sm", variant="secondary")
export_zip_btn = gr.Button("📦 ZIP (Workspace)", size="sm", variant="primary")
export_file = gr.File(label="Archivo generado", visible=True)
from backend.tools.export_utils import export_markdown, export_bibtex, export_zip, export_docx
def _do_export_md(report_state, q):
if not report_state: return gr.update(value=None)
return export_markdown(report_state, q or "research")
def _do_export_bib(docs_df, q):
if docs_df is None or docs_df.empty: return gr.update(value=None)
return export_bibtex(docs_df, q or "references")
def _do_export_docx(report_state, q):
if not report_state: return gr.update(value=None)
path = export_docx(report_state, q or "research")
return path if path else gr.update(value=None)
def _do_export_zip(report_state, docs_df, q):
if not report_state: return gr.update(value=None)
import pandas as pd
if docs_df is None:
docs_df = pd.DataFrame()
return export_zip(report_state, docs_df, q or "research")
export_md_btn.click(fn=_do_export_md, inputs=[report_md_state, query], outputs=[export_file])
export_bib_btn.click(fn=_do_export_bib, inputs=[docs, query], outputs=[export_file])
export_docx_btn.click(fn=_do_export_docx, inputs=[report_md_state, query], outputs=[export_file])
export_zip_btn.click(fn=_do_export_zip, inputs=[report_md_state, docs, query], outputs=[export_file])
# Create chat tabs
from modules.chat_tab import create_chat_tabs
create_chat_tabs(report_md_state, docs, prov, synth_m)
# Wire control buttons
stop_btn.click(fn=_control_stop, outputs=[status])
pause_btn.click(fn=_control_pause, outputs=[status])
resume_btn.click(fn=_control_resume, outputs=[status])
# Return all components needed for event binding
return (btn, query, prov, search_m, synth_m, trans_m, prof, dep, iters,
val, src, dme, strat, yr_s, yr_e, uni, inf_out, max_cont, grade_mode, geo,
status, progress, report, docs, sections, refs, stats, report_md_state)
# ══════════════════════════════════════════════════════════════
# UI TAB
# ══════════════════════════════════════════════════════════════
def create_research_tab():
with gr.Tab("🔬 Research", id="research"):
gr.HTML('''''')
with gr.Tabs():
# ─── RESEARCH ───
with gr.TabItem("🔬 Research"):
r = _build_research_panel(
"r", "Research Pipeline",
"Búsqueda iterativa + síntesis con IA en tiempo real",
"🚀 Ejecutar Research", research_handler, is_super=False
)
r[0].click(
fn=research_handler,
inputs=list(r[1:20]),
outputs=list(r[20:28]),
)
# ─── SUPER RESEARCH ───
with gr.TabItem("🚀 Super Research"):
s = _build_research_panel(
"s", "Super Research Pipeline",
"Investigación profunda multi-ronda con validación cruzada",
"⚡ Ejecutar Super Research", super_research_handler, is_super=True
)
s[0].click(
fn=super_research_handler,
inputs=list(s[1:20]),
outputs=list(s[20:28]),
)
# ─── SÍNTESIS ───
with gr.TabItem("📝 Síntesis"):
gr.HTML('''
📝
Síntesis de Documentos
Generar informe a partir de documentos proporcionados
''')
with gr.Row():
with gr.Column(scale=2):
y_status = gr.HTML(_build_status_html("idle"))
y_progress = gr.HTML(_build_progress_html(-1, "Esperando consulta..."))
gr.HTML('''''')
y_query = gr.Textbox(
label="", show_label=False,
placeholder="Ej: Marco teórico sobre gestión del conocimiento",
lines=2, elem_classes=["glass-input-wrapper"]
)
gr.HTML('''''')
y_docs = gr.Textbox(
label="", show_label=False,
placeholder="[1] García (2023) - Gestión del conocimiento en Perú\n[2] Smith (2022) - Knowledge management systems\n[3] López (2024) - Bases de datos académicas",
lines=8, elem_classes=["glass-input-wrapper"]
)
y_provider = gr.Dropdown(
choices=list(PROVIDERS.keys()), value="mistral",
label="⚡ Proveedor IA",
)
with gr.Accordion("🤖 Modelos por Rol", open=False):
y_search_model = gr.Dropdown(
choices=PROVIDERS["mistral"]["models"],
value=DEFAULT_MODEL, label="🔍 Búsqueda",
)
y_synthesis_model = gr.Dropdown(
choices=PROVIDERS["mistral"]["models"],
value=DEFAULT_MODEL, label="📝 Síntesis",
)
y_translation_model = gr.Dropdown(
choices=PROVIDERS["mistral"]["models"],
value=DEFAULT_MODEL, label="🌐 Traducción",
)
y_provider.change(
fn=update_models, inputs=[y_provider],
outputs=[y_search_model, y_synthesis_model, y_translation_model],
)
with gr.Accordion("🔧 Opciones Avanzadas", open=False):
with gr.Row():
y_profile = gr.Dropdown(
choices=list(AGENT_PROFILES.keys()),
value="auto", label="🎭 Perfil",
)
y_validation = gr.Checkbox(value=True, label="🔬 Validación ARA+")
y_geo = gr.Textbox(value="Automático", label="📍 Contexto Geográfico (País/Universidad)", placeholder="Ej: Perú, Universidad Nacional del Santa")
y_enable_dme = gr.Checkbox(value=True, label="🔧 DME")
y_synthesis_strategy = gr.Radio(
choices=["lineal", "jerárquica", "auto"],
value="jerárquica", label="📐 Estrategia",
)
y_grade_mode = gr.Radio(
choices=["original", "keywords", "llm", "oxford", "hybrid"],
value="original", label="📊 Algoritmo GRADE",
)
y_btn = gr.Button(
"📝 Ejecutar Síntesis", variant="primary", size="lg",
elem_classes=["ejecutar-btn"]
)
with gr.Column(scale=3):
with gr.Tabs():
with gr.TabItem("📄 Informe"):
y_report = gr.HTML(_make_citations_interactive("", None))
with gr.TabItem("📚 Referencias"):
y_refs = gr.Markdown("_Las referencias aparecerán aquí..._")
with gr.TabItem("📑 Secciones"):
y_sections = gr.HTML(_build_section_cards_html({}))
with gr.TabItem("📊 Estadísticas"):
y_stats = gr.HTML(_build_stats_html("", None))
with gr.TabItem("📋 Documentos"):
y_docs_out = gr.Dataframe(
headers=["Título", "Autores", "Año", "DOI", "Fuente", "GRADE", "PDF URL"],
label="Documentos", wrap=True,
)
y_report_md_state = gr.State("")
y_btn.click(
fn=synthesis_handler,
inputs=[
y_query, y_docs, y_provider, y_search_model,
y_synthesis_model, y_translation_model, y_profile,
y_validation, y_enable_dme, y_synthesis_strategy,
y_grade_mode, y_geo,
],
outputs=[
y_status, y_progress, y_report, y_docs_out,
y_sections, y_refs, y_stats, y_report_md_state,
],
)