Spaces:

C2MV
/

letxinet

Runtime error

App Files Files Community

letxinet / modules /research_tab.py

C2MV

Initial upload for Build Small Hackathon

68fb5e2 verified 20 days ago

Raw

History Blame Contribute Delete

83.9 kB

	import gradio as gr
	import json
	import asyncio
	import os
	import sys
	import re


	FLOATING_CARD_JS = ''
	# Note: showCiteCard/closeCiteCard JS and MathJax are now globally loaded via THEME_JS in app.py


	import time

	sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

	from dotenv import load_dotenv
	_project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
	load_dotenv(os.path.join(_project_root, ".env"))

	from backend.pipeline import ResearchPipeline
	from backend.tools.search_engine import search
	from backend.tools.graph_generator import generator as graph_generator
	from modules.graph_module import generate_interactive_graph
	from backend.synthesis import PROVIDERS
	from backend.prompts.profiles import AGENT_PROFILES
	from .utils import format_results_for_dataframe, format_error

	DEFAULT_MODEL = "mistral-small-2506"

	# Grupos de búsqueda
	GROUPS = ["all", "latam", "global", "tesis", "iberoamerica", "peru", "brasil", "ecuador", "mexico", "ai_ml"]

	# Fuentes individuales
	INDIVIDUAL_SOURCES = [
	"alicia", "renati", "lareferencia", "bdtd", "rraae",
	"semantic", "openalex", "pubmed", "arxiv", "crossref",
	"dblp", "scopus", "zenodo", "openaire", "doaj",
	"core", "redalyc", "serpapi"
	]

	ALL_SOURCES = GROUPS + INDIVIDUAL_SOURCES
	# ─── Module-level pipeline reference for stop/pause/resume ───
	_active_pipeline = None


	def _control_stop():
	"""Stop the active pipeline"""
	global _active_pipeline
	if _active_pipeline:
	_active_pipeline.stop()
	return _build_status_html("error", "⛔ Detenido por el usuario")
	return _build_status_html("idle")


	def _control_pause():
	"""Pause the active pipeline"""
	global _active_pipeline
	if _active_pipeline:
	_active_pipeline.pause()
	return _build_status_html("running", "⏸️ Pausado — haz clic en Reanudar")
	return _build_status_html("idle")


	def _control_resume():
	"""Resume the active pipeline"""
	global _active_pipeline
	if _active_pipeline:
	_active_pipeline.resume()
	return _build_status_html("running", "▶️ Reanudado")
	return _build_status_html("idle")


	def _build_controls_html(state="idle"):
	"""Build the control buttons bar matching Next.js AgentView"""
	if state == "idle":
	return '''
	<div style="display:flex; gap:8px; align-items:center; padding:8px 0;">
	<span style="font-size:12px; color:var(--text-muted, #9ca3af);">
	⏹️ Pipeline inactivo
	</span>
	</div>'''

	if state == "paused":
	return '''
	<div style="
	display:flex; gap:8px; align-items:center; padding:10px 16px;
	background:rgba(245,158,11,0.08); border:1px solid rgba(245,158,11,0.3);
	border-radius:12px; animation:pulse 2s infinite;
	">
	<span style="width:8px;height:8px;border-radius:50%;background:#f59e0b;box-shadow:0 0 8px rgba(245,158,11,0.5);"></span>
	<span style="font-size:13px; font-weight:600; color:#f59e0b;">⏸️ Pipeline pausado</span>
	<span style="font-size:11px; color:var(--text-muted, #9ca3af); margin-left:8px;">Haz clic en ▶ Reanudar para continuar</span>
	</div>'''

	if state == "stopped":
	return '''
	<div style="
	display:flex; gap:8px; align-items:center; padding:10px 16px;
	background:rgba(239,68,68,0.08); border:1px solid rgba(239,68,68,0.3);
	border-radius:12px;
	">
	<span style="width:8px;height:8px;border-radius:50%;background:#ef4444;"></span>
	<span style="font-size:13px; font-weight:600; color:#ef4444;">⛔ Pipeline detenido</span>
	</div>'''

	# running
	return '''
	<div style="
	display:flex; gap:8px; align-items:center; padding:8px 0;
	">
	<span style="font-size:12px; color:var(--text-muted, #9ca3af);">
	⚡ Pipeline activo — usa los botones para controlar
	</span>
	</div>'''


	PHASES = [
	{"id": -1, "label": "Verificación de Fuentes", "icon": "🏥", "pct": 0, "color": "#6b7280"},
	{"id": 0, "label": "Optimización de Queries", "icon": "🧠", "pct": 5, "color": "#8b5cf6"},
	{"id": 1, "label": "Búsqueda Iterativa", "icon": "🔍", "pct": 15, "color": "#3b82f6"},
	{"id": 2, "label": "Detección de Vacíos", "icon": "🔎", "pct": 35, "color": "#06b6d4"},
	{"id": 3, "label": "Búsqueda de Rescate", "icon": "🚑", "pct": 45, "color": "#f59e0b"},
	{"id": 4, "label": "Plan Maestro", "icon": "📋", "pct": 55, "color": "#10b981"},
	{"id": 5, "label": "Redacción de Secciones", "icon": "✍️", "pct": 65, "color": "#a855f7"},
	{"id": 6, "label": "Validación y Corrección", "icon": "✅", "pct": 90, "color": "#22c55e"},
	{"id": 7, "label": "Completado", "icon": "🎉", "pct": 100,"color": "#10b981"},
	]

	# ─── Source badge colors (matching search_tab.py) ───
	SOURCE_COLORS = {
	"pubmed": "#3b82f6", "semantic_scholar": "#8b5cf6", "openalex": "#06b6d4",
	"crossref": "#f59e0b", "arxiv": "#ef4444", "doaj": "#10b981",
	"zenodo": "#6366f1", "dblp": "#ec4899", "openaire": "#14b8a6",
	"core": "#f97316", "scielo": "#22c55e", "redalyc": "#a855f7",
	"latindex": "#0ea5e9", "dialnet": "#e11d48", "la_referencia": "#84cc16",
	}

	GRADE_COLORS = {
	"1A": "#10b981", "1B": "#22c55e", "2A": "#3b82f6", "2B": "#60a5fa",
	"3A": "#f59e0b", "3B": "#fbbf24", "4": "#f97316", "5": "#ef4444", "6": "#6b7280",
	}


	def update_models(prov_name):
	cfg = PROVIDERS.get(prov_name, PROVIDERS["mistral"])
	return gr.update(choices=cfg["models"], value=cfg["models"][0])


	def _build_progress_html(phase_id, extra=""):
	"""Build a premium glassmorphic progress bar matching the search-popup style"""
	phase = next((p for p in PHASES if p["id"] == phase_id), PHASES[-1])
	pct = phase["pct"]
	label = phase["label"]
	icon = phase["icon"]
	color = phase["color"]

	# Build phase dots
	dots_html = ""
	for p in PHASES:
	if p["id"] < 0:
	continue
	is_done = p["pct"] <= pct and pct > 0
	is_active = p["id"] == phase_id
	dot_color = p["color"] if is_done else "rgba(255,255,255,0.1)"
	dot_size = "10px" if is_active else "8px"
	glow = f"box-shadow:0 0 8px {p['color']}60;" if is_active else ""
	border = f"border:2px solid {p['color']};" if is_active else ""
	dots_html += f'''<div title="{p['icon']} {p['label']}" style="
	width:{dot_size}; height:{dot_size}; border-radius:50%;
	background:{dot_color}; {glow} {border}
	transition:all 0.3s ease; cursor:pointer;
	"></div>'''

	extra_html = f'''<div style="font-size:11px; color:var(--text-muted, #9ca3af); margin-top:6px;
	padding:4px 10px; border-radius:6px; background:rgba(139,92,246,0.06);
	border:1px solid rgba(139,92,246,0.15);
	">{extra}</div>''' if extra else ""

	pulse_anim = "animation:pulse 2s infinite;" if pct < 100 and pct > 0 else ""

	return f'''
	<div style="
	background: var(--glass, rgba(17,24,39,0.6));
	backdrop-filter: blur(16px);
	border: 1px solid var(--glass-border, rgba(255,255,255,0.08));
	border-radius: 14px; padding: 16px 20px;
	font-family: Inter, system-ui, sans-serif;
	">
	<div style="display:flex; justify-content:space-between; align-items:center; margin-bottom:10px;">
	<div style="display:flex; align-items:center; gap:10px;">
	<span style="
	font-size:20px; width:36px; height:36px; display:flex; align-items:center; justify-content:center;
	background:linear-gradient(135deg, {color}20, {color}10);
	border:1px solid {color}40; border-radius:10px; {pulse_anim}
	">{icon}</span>
	<div>
	<div style="font-size:14px; font-weight:700; color:var(--text, #fff);">
	{label}
	</div>
	<div style="font-size:11px; color:var(--text-muted, #9ca3af);">
	Fase {max(0, phase_id + 1)} de {len(PHASES) - 1}
	</div>
	</div>
	</div>
	<span style="
	font-size:13px; font-weight:700; color:{color};
	padding:4px 12px; border-radius:20px;
	background:{color}15; border:1px solid {color}30;
	">{pct}%</span>
	</div>
	<div style="background:rgba(255,255,255,0.05); border-radius:8px; height:8px; overflow:hidden; margin-bottom:10px;">
	<div style="width:{pct}%; height:100%; border-radius:8px;
	background:linear-gradient(90deg, {color}, {color}cc);
	transition:width 0.8s cubic-bezier(0.23,1,0.32,1);
	box-shadow:0 0 12px {color}40;
	"></div>
	</div>
	<div style="display:flex; gap:6px; align-items:center; justify-content:center;">
	{dots_html}
	</div>
	{extra_html}
	</div>'''


	def _build_status_html(state="idle", extra=""):
	"""Build a premium status indicator"""
	configs = {
	"idle": {"color": "#6b7280", "icon": "⏹️", "label": "Inactivo", "bg": "rgba(107,114,128,0.08)", "border": "rgba(107,114,128,0.2)"},
	"running": {"color": "#8b5cf6", "icon": "⚡", "label": "En ejecución...", "bg": "rgba(139,92,246,0.08)", "border": "rgba(139,92,246,0.3)"},
	"done": {"color": "#10b981", "icon": "✅", "label": "Completado", "bg": "rgba(16,185,129,0.08)", "border": "rgba(16,185,129,0.3)"},
	"error": {"color": "#ef4444", "icon": "❌", "label": "Error", "bg": "rgba(239,68,68,0.08)", "border": "rgba(239,68,68,0.3)"},
	}
	cfg = configs.get(state, configs["idle"])
	pulse = "animation:pulse 2s infinite;" if state == "running" else ""
	extra_html = f'<span style="color:var(--text-muted, #9ca3af); margin-left:8px; font-size:12px;">{extra}</span>' if extra else ""

	return f'''
	<div style="
	display:inline-flex; align-items:center; gap:10px;
	background:{cfg['bg']}; border:1px solid {cfg['border']};
	border-radius:10px; padding:8px 16px;
	backdrop-filter:blur(12px); {pulse}
	">
	<span style="
	width:8px; height:8px; border-radius:50%;
	background:{cfg['color']}; box-shadow:0 0 8px {cfg['color']}60;
	"></span>
	<span style="font-size:13px; font-weight:600; color:{cfg['color']};">
	{cfg['icon']} {cfg['label']}
	</span>
	{extra_html}
	</div>'''


	def _parse_sections_from_report(report_md):
	if not report_md:
	return {}
	sections = {}
	current = None
	current_lines = []
	for line in report_md.split("\n"):
	# Match Markdown headers: ## Title or ### Title
	m = re.match(r'^#{2,3}\s+(.+)', line)
	# Match LaTeX headers: \section{Title}, \subsection{Title}, \subsubsection{Title}
	if not m:
	m = re.match(r'\\(?:sub)*section\{(.+?)\}', line)
	if m:
	if current:
	sections[current] = "\n".join(current_lines).strip()
	title = m.group(1).strip()
	title = re.sub(r'^[🔬📝📊🔎🚑📋✍️✅🎉🏥🧠🔍\s]+', '', title).strip()
	if not title:
	title = current or "Sin título"
	current = title
	current_lines = []
	else:
	current_lines.append(line)
	if current:
	sections[current] = "\n".join(current_lines).strip()
	return sections



	def _build_references_html(docs_df, report_md=""):
	if docs_df is None or docs_df.empty:
	return "_Sin referencias disponibles aún..._"

	import json as _json
	import re
	import math
	import base64

	# Extract cited indices from report_md
	cited_indices = set()
	if report_md:
	for match in re.finditer(r'\[(\d+)\]', report_md):
	cited_indices.add(int(match.group(1)))

	has_text_produced = bool(report_md.strip())

	html = '<div style="display:flex; justify-content:space-between; align-items:center; margin-bottom:16px; padding-bottom:12px; border-bottom:1px solid rgba(255,255,255,0.1);">'
	html += '<div id="refs-stats" style="font-size:13px; color:#9ca3af; font-weight:500;"></div>'

	# Filters
	html += '<div style="display:flex; gap:12px; align-items:center;">'
	if has_text_produced:
	html += '''
	<label style="display:flex; align-items:center; gap:6px; font-size:13px; color:#d1d5db; cursor:pointer;">
	<input type="checkbox" id="refs-filter-cited" onchange="document.getElementById('refs-container').setAttribute('data-page', '1'); initRefsPagination()" style="accent-color:var(--accent, #8b5cf6); width:16px; height:16px;">
	Solo citados en texto
	</label>
	'''
	else:
	html += '<input type="checkbox" id="refs-filter-cited" style="display:none;">'

	html += '</div></div>'

	html += '<div id="refs-container" data-page="1" style="display:flex; flex-direction:column; gap:12px; min-height:400px;">'

	for idx, row in docs_df.iterrows():
	num = idx + 1
	autores = str(row.get("Autores", ""))
	año = str(row.get("Año", ""))
	titulo = str(row.get("Título", ""))
	fuente = str(row.get("Fuente", ""))
	grade = str(row.get("GRADE", ""))

	parts = [a.strip() for a in autores.split(",")]
	surnames = [p.split()[-1] for p in parts if p and "..." not in p]

	if len(surnames) == 1:
	cite_text = f"{surnames[0]} ({año})"
	elif len(surnames) == 2:
	cite_text = f"{surnames[0]} y {surnames[1]} ({año})"
	elif len(surnames) > 2:
	cite_text = f"{surnames[0]} et al. ({año})"
	else:
	cite_text = f"Sin Autor ({año})"

	level_key = grade.split(" - ")[0].strip().upper() if grade else "UNKNOWN"
	color = GRADE_COLORS.get(level_key, "#6b7280")

	import math
	import base64
	found = {k: ("" if (isinstance(v, float) and math.isnan(v)) else v) for k, v in row.to_dict().items()}
	data_json = _json.dumps(found, ensure_ascii=False)
	data_b64 = base64.b64encode(data_json.encode('utf-8')).decode('utf-8')

	is_cited = str(num in cited_indices).lower()
	initial_display = "flex" if idx < 10 else "none"

	html += f'''
	<div class="ref-item" data-cited="{is_cited}" style="display:{initial_display}; padding:14px; border-radius:10px; background:var(--glass, rgba(17,24,39,0.4)); border:1px solid var(--glass-border, rgba(255,255,255,0.06)); gap:14px; align-items:flex-start; transition: all 0.2s;">
	<div style="font-weight:800; color:var(--accent); min-width:32px; font-size:16px;">[{num}]</div>
	<div style="flex-grow:1;">
	<div style="margin-bottom:6px; line-height:1.4;">
	<span class="cite-link" data-cite-b64="{data_b64}" onclick="showCiteCard(this, {idx})" style="font-weight:700; font-size:15px; cursor:pointer; color:var(--accent, #8b5cf6);">
	[{num}] {cite_text}.
	</span> <span style="font-style:italic; font-size:15px; opacity:0.9;">{titulo}</span>
	</div>
	<div style="display:flex; gap:8px; margin-top:8px; align-items:center; flex-wrap:wrap;">
	<span style="font-size:11px; font-weight:600; padding:3px 10px; border-radius:12px; background:rgba(255,255,255,0.08);">{fuente}</span>
	<span style="font-size:11px; font-weight:600; padding:3px 10px; border-radius:12px; background:{color}15; border:1px solid {color}40; color:{color};">{grade}</span>
	</div>
	</div>
	</div>
	'''

	html += '</div>'
	html += '<div id="refs-pagination" style="display:flex; justify-content:center; align-items:center; margin-top:24px; padding-top:16px; border-top:1px solid rgba(255,255,255,0.05); gap:4px;"></div>'

	html += '<img src="data:image/gif;base64,R0lGODlhAQABAIAAAP///wAAACH5BAEAAAAALAAAAAABAAEAAAICRAEAOw==" onload="if(window.initRefsPagination) window.initRefsPagination();" style="display:none;">'

	html += FLOATING_CARD_JS
	return html


	def _build_stats_html(report_md, docs_df):
	"""Build a premium stats dashboard matching the search-popup card style"""
	import pandas as pd
	total_docs = len(docs_df) if docs_df is not None and not docs_df.empty else 0
	sections = _parse_sections_from_report(report_md)
	total_sections = len(sections)
	word_count = len(report_md.split()) if report_md else 0

	grade_data = {}
	if docs_df is not None and not docs_df.empty and "GRADE" in docs_df.columns:
	grade_data = docs_df["GRADE"].value_counts().to_dict()

	# Build stat cards
	stats = [
	("📄", "Documentos", str(total_docs), "#3b82f6"),
	("📑", "Secciones", str(total_sections), "#8b5cf6"),
	("📝", "Palabras", f"{word_count:,}", "#10b981"),
	]

	cards_html = ""
	for icon, label, val, color in stats:
	cards_html += f'''
	<div class="stat-card" style="
	position:relative; overflow:hidden;
	">
	<div style="
	position:absolute; top:0; left:0; right:0; height:3px;
	background:linear-gradient(90deg, {color}, {color}60);
	"></div>
	<div style="font-size:24px; margin-bottom:6px; margin-top:4px;">{icon}</div>
	<div style="
	font-size:26px; font-weight:800; color:{color};
	letter-spacing:-0.5px; line-height:1;
	">{val}</div>
	<div style="
	font-size:11px; color:var(--text-muted, #9ca3af); margin-top:6px;
	font-weight:500; text-transform:uppercase; letter-spacing:0.5px;
	">{label}</div>
	</div>'''

	# GRADE distribution badges
	grade_html = ""
	if grade_data:
	grade_badges = ""
	for label, count in sorted(grade_data.items(), key=lambda x: -x[1]):
	level_key = label.split(" - ")[0].strip() if " - " in label else label
	color = GRADE_COLORS.get(level_key.upper(), "#6b7280")
	grade_badges += f'''<span style="
	display:inline-flex; align-items:center; gap:5px;
	padding:4px 10px; border-radius:20px; font-size:11px; font-weight:600;
	background:{color}15; border:1px solid {color}40; color:{color};
	">
	<span style="width:6px;height:6px;border-radius:50%;background:{color};"></span>
	{label}: {count}
	</span>'''

	grade_html = f'''
	<div style="margin-top:12px; padding-top:12px; border-top:1px solid var(--glass-border, rgba(255,255,255,0.06));">
	<div style="font-size:12px; font-weight:600; color:var(--text-muted, #9ca3af); margin-bottom:8px;">
	🏅 Distribución GRADE
	</div>
	<div style="display:flex; flex-wrap:wrap; gap:6px;">
	{grade_badges}
	</div>
	</div>'''

	return f'''
	<div style="
	background:var(--glass, rgba(17,24,39,0.6));
	backdrop-filter:blur(16px);
	border:1px solid var(--glass-border, rgba(255,255,255,0.08));
	border-radius:14px; padding:16px 20px;
	">
	<div style="display:grid; grid-template-columns:repeat(auto-fit, minmax(120px, 1fr)); gap:12px;">
	{cards_html}
	</div>
	{grade_html}
	</div>'''



	def _generate_graph_from_df(df):
	return generate_interactive_graph(df)


	def _detect_phase(report_md):
	if not report_md:
	return 0
	text = report_md.lower()
	if ("completado" in text and ("secciones:" in text or "docs citados:" in text)) or "fase 8" in text:
	return 8
	if "reporte final" in text or "generando reporte" in text:
	return 7
	if "grade" in text or "clasificación grade" in text:
	return 6
	if ("validación" in text or "validate" in text or "ara+" in text) and "recuperación" not in text:
	return 6
	if "redactando" in text or "redacción" in text or "writing" in text:
	return 5
	if "plan maestro" in text or "master plan" in text or "fase 4" in text:
	return 4
	if "rescate" in text or "rescue" in text or "fase 3" in text:
	return 3
	if "detección de vacíos" in text or "gap detection" in text or "fase 2" in text:
	return 2
	if "ronda" in text or "buscando" in text or "búsqueda" in text:
	return 1
	if "optimiz" in text or "query" in text:
	return 0
	return 0


	# _refs_to_markdown removed, handled by _build_references_html


	SECTION_COLORS = [
	"#8b5cf6", "#3b82f6", "#06b6d4", "#10b981", "#f59e0b",
	"#ef4444", "#ec4899", "#6366f1", "#14b8a6", "#f97316",
	]

	def _build_section_cards_html(sections_map, is_done=False):
	"""Build glassmorphic expandable section cards"""
	if not sections_map:
	return '''<div style="
	text-align:center; padding:40px 20px; color:#6b7280;
	">
	<div style="font-size:36px; margin-bottom:10px; opacity:0.5;">📑</div>
	<div style="font-size:13px;">Las secciones aparecerán aquí durante la ejecución...</div>
	</div>'''

	cards = ""
	for i, (title, content) in enumerate(sections_map.items()):
	color = SECTION_COLORS[i % len(SECTION_COLORS)]
	word_count = len(content.split()) if content else 0
	status_icon = "✅" if (is_done or word_count > 50) else "⏳"
	sec_id = f"sec_{i}"

	# Escape content for display
	content_preview = content[:300].replace("<", "<").replace(">", ">") if content else ""
	content_full = content.replace("<", "<").replace(">", ">") if content else ""

	# Copy section button
	content_escaped = content.replace("'", "\\'").replace("\n", "\\n").replace('"', '"') if content else ""

	cards += f'''
	<div class="section-card" style="animation:slideIn 0.3s ease {i * 0.06}s both;">
	<!-- Color accent -->
	<div style="height:3px; background:linear-gradient(90deg, {color}, {color}80);"></div>

	<!-- Header (clickable to expand) -->
	<div class="section-card-header" onclick="
	var body=document.getElementById('{sec_id}_body');
	var arrow=document.getElementById('{sec_id}_arrow');
	if(body.style.display==='none'){{body.style.display='block';arrow.textContent='▲';}}
	else{{body.style.display='none';arrow.textContent='▼';}}
	">
	<div style="display:flex; align-items:center; gap:10px;">
	<div style="
	width:28px; height:28px; border-radius:8px;
	background:linear-gradient(135deg, {color}25, {color}10);
	border:1px solid {color}40;
	display:flex; align-items:center; justify-content:center;
	font-size:12px; font-weight:700; color:{color};
	">{i+1}</div>
	<div>
	<div style="font-size:13px; font-weight:600; color:var(--text, #fff);">{title}</div>
	<div style="font-size:11px; color:var(--text-muted, #9ca3af); margin-top:2px;">
	{status_icon} {word_count} palabras
	</div>
	</div>
	</div>
	<div style="display:flex; align-items:center; gap:8px;">
	<button onclick="
	event.stopPropagation();
	navigator.clipboard.writeText('{content_escaped}');
	this.textContent='✅ Copiado';
	var btn=this;
	setTimeout(function(){{btn.textContent='📋';}},1500);
	" style="
	background:rgba(139,92,246,0.08); border:1px solid rgba(139,92,246,0.2);
	color:#8b5cf6; border-radius:6px; padding:4px 8px;
	font-size:11px; cursor:pointer; transition:all 0.2s;
	" title="Copiar sección">📋</button>
	<span id="{sec_id}_arrow" style="color:var(--text-muted, #9ca3af); font-size:12px;">▼</span>
	</div>
	</div>

	<!-- Body (collapsed by default) -->
	<div id="{sec_id}_body" class="section-card-body" style="display:none;">
	<div style="
	font-size:13px; line-height:1.7; color:var(--text, #e5e7eb);
	padding-top:12px; white-space:pre-wrap;
	">{content_full}</div>
	</div>
	</div>'''

	return f'''<div style="max-height:650px; overflow-y:auto; padding-right:4px;">
	{cards}
	</div>'''


	# ══════════════════════════════════════════════════════════════
	# INTERACTIVE CITATIONS (Floating Card on Click)
	# ══════════════════════════════════════════════════════════════

	def _build_docs_index(docs_df):
	"""Build a lookup dict: author_year_key -> paper details."""
	import pandas as pd
	index = {}
	if docs_df is None or (hasattr(docs_df, 'empty') and docs_df.empty):
	return index

	rows = docs_df.to_dict(orient="records") if hasattr(docs_df, 'to_dict') else []
	for row in rows:
	title = row.get("Título", row.get("title", ""))
	authors_raw = row.get("Autores", row.get("authors", ""))
	year = str(row.get("Año", row.get("year", "")))
	doi = row.get("DOI", row.get("doi", ""))
	source = row.get("Fuente", row.get("source", ""))
	grade = row.get("GRADE", row.get("grade", ""))
	pdf_url = row.get("PDF URL", row.get("pdf_url", ""))

	# Extract surname(s)
	if isinstance(authors_raw, list):
	surnames = [a.split()[-1] for a in authors_raw[:3] if a]
	authors_display = ", ".join(authors_raw[:3])
	elif isinstance(authors_raw, str) and authors_raw:
	parts = [a.strip() for a in authors_raw.split(",")]
	surnames = [p.split()[-1] for p in parts[:3] if p]
	authors_display = authors_raw
	else:
	surnames = []
	authors_display = ""

	# Build keys: "surname_year", "surname1_surname2_year" etc.
	for s in surnames:
	key = f"{s.lower()}_{year}"
	if key not in index:
	index[key] = {
	"title": title, "authors": authors_display, "year": year,
	"doi": doi, "source": source, "grade": grade, "pdf_url": pdf_url,
	}
	# Combined key for multi-author
	if len(surnames) >= 2:
	combined = "_".join(s.lower() for s in surnames[:2]) + f"_{year}"
	index[combined] = {
	"title": title, "authors": authors_display, "year": year,
	"doi": doi, "source": source, "grade": grade, "pdf_url": pdf_url,
	}

	return index


	def _latex_to_html(text):
	"""Convert common LaTeX commands to HTML for browser rendering."""
	if not text:
	return text

	# --- Structural commands ---
	# \section{Title} -> <h2>Title</h2>
	text = re.sub(r'\\section\*?\{(.+?)\}', r'<h2>\1</h2>', text)
	# \subsection{Title} -> <h3>Title</h3>
	text = re.sub(r'\\subsection\*?\{(.+?)\}', r'<h3>\1</h3>', text)
	# \subsubsection{Title} -> <h4>Title</h4>
	text = re.sub(r'\\subsubsection\*?\{(.+?)\}', r'<h4>\1</h4>', text)

	# --- Inline formatting ---
	# \textbf{bold} -> <strong>bold</strong>
	text = re.sub(r'\\textbf\{(.+?)\}', r'<strong>\1</strong>', text)
	# \textit{italic} -> <em>italic</em>
	text = re.sub(r'\\textit\{(.+?)\}', r'<em>\1</em>', text)
	# \emph{text} -> <em>text</em>
	text = re.sub(r'\\emph\{(.+?)\}', r'<em>\1</em>', text)
	# \underline{text} -> <u>text</u>
	text = re.sub(r'\\underline\{(.+?)\}', r'<u>\1</u>', text)

	# --- Fix model hallucinative curly braces for taxonomy ---
	# Convert {Word} to Word for markdown italics, ignoring {{BIB:ID}} and existing LaTeX commands
	text = re.sub(r'(?<![\\\{])\{([^{}\n]+)\}(?!\})', r'\1', text)

	# --- List environments ---
	# Capture blocks between "itemize" and "itemize"
	def fix_itemize_block(match):
	content = match.group(1).strip()
	lines = content.split('\n')
	fixed_lines = []
	for line in lines:
	line = line.strip()
	if not line:
	continue
	if line.startswith('-'):
	fixed_lines.append(f"\\item {line[1:].strip()}")
	elif not line.startswith('\\item'):
	fixed_lines.append(f"\\item {line}")
	else:
	fixed_lines.append(line)
	return "\\begin{itemize}\n" + "\n".join(fixed_lines) + "\n\\end{itemize}"

	text = re.sub(r'(?ims)^\sitemize\s$(.?)(^\sitemize\s*$)', fix_itemize_block, text)

	# \begin{itemize}...\end{itemize}
	text = re.sub(r'\\begin\{itemize\}', '<ul>', text)
	text = re.sub(r'\\end\{itemize\}', '</ul>', text)
	text = re.sub(r'\\item\s*', '<li>', text)

	# Fix stray "itemize" text that might remain if not paired
	text = re.sub(r'(?im)^\sitemize\s$', '', text)

	# Fix math units where AI writes $$g/ml instead of \mu g/ml
	text = text.replace('$$g/ml', 'µg/ml')
	text = text.replace('$$g', 'µg')

	# --- CATALOGO DE TRADUCCION CIENTIFICA PARA FRONTEND ---
	# 1. Notacion cientifica (x10^n o x 10^{n})
	text = re.sub(r'(?i)x\s*10\^\{([^}]+)\}', r'× 10<sup>\1</sup>', text)
	text = re.sub(r'(?i)x\s*10\^([0-9\-]+)', r'× 10<sup>\1</sup>', text)

	# 2. Quimica y Subindices comunes (CO2, H2O, NO3-)
	# Busca una letra mayuscula (opcional minuscula) seguida de _ y un numero. Ejemplo: CO_2 -> CO<sub>2</sub>
	text = re.sub(r'([A-Z][a-z]?)_([0-9]+)', r'\1<sub>\2</sub>', text)
	# Variante para {}: CO_{2} -> CO<sub>2</sub>
	text = re.sub(r'([A-Z][a-z]?)_\{([0-9]+)\}', r'\1<sub>\2</sub>', text)

	# 3. Superindices aislados sin $ (e.g. m^2 o cm^{3})
	text = re.sub(r'([a-zA-Z]+)\^\{([0-9\-]+)\}', r'\1<sup>\2</sup>', text)
	text = re.sub(r'([a-zA-Z]+)\^([0-9\-]+)', r'\1<sup>\2</sup>', text)

	# 4. Temperaturas (25 oC, 25oC, 25°C)
	text = re.sub(r'\b([0-9]+)\s*[oO]C\b', r'\1 °C', text)

	# 5. Simbolos matematicos comunes escritos a mano
	text = text.replace('+/-', '±')
	text = text.replace('>=', '≥')
	text = text.replace('<=', '≤')

	# 6. Microgramos escritos con 'u' (ug/ml)
	text = re.sub(r'\bug/ml\b', 'µg/ml', text)
	text = re.sub(r'\bug/L\b', 'µg/L', text)
	text = re.sub(r'\bug\b', 'µg', text)
	# --------------------------------------------------------

	text = re.sub(r'\\end\{enumerate\}', '</ol>', text)
	text = re.sub(r'\\item\s*', '<li>', text)

	# --- Escaped characters ---
	text = text.replace(r'\%', '%')
	text = text.replace(r'\&', '&')
	text = text.replace(r'\#', '#')
	text = text.replace(r'\_', '_')
	text = text.replace(r'\$', '$')

	# --- Remove pure LaTeX boilerplate ---
	text = re.sub(r'\\begin\{document\}', '', text)
	text = re.sub(r'\\end\{document\}', '', text)
	text = re.sub(r'\\begin\{abstract\}', '', text)
	text = re.sub(r'\\end\{abstract\}', '', text)
	text = re.sub(r'\\maketitle', '', text)
	text = re.sub(r'\\documentclass\{[^}]*\}', '', text)
	text = re.sub(r'\\usepackage\{[^}]*\}', '', text)
	text = re.sub(r'\\title\{[^}]*\}', '', text)
	text = re.sub(r'\\author\{[^}]*\}', '', text)
	text = re.sub(r'\\date\{[^}]*\}', '', text)

	# --- Citations: \cite{key} -> leave as-is for downstream processing ---
	text = re.sub(r'\\cite\{([^}]+)\}', r'[\1]', text)

	# --- Paragraph breaks: double newlines ---
	text = re.sub(r'\n{2,}', '</p><p>', text)

	# --- Clean leftover backslash commands that are not math ---
	# But preserve $...$ and $$...$$ for MathJax
	text = re.sub(r'\\(?:noindent\|newpage\|clearpage\|vspace\{[^}]\}\|hspace\{[^}]\}\|par)\b', '', text)

	return text


	def _make_citations_interactive(report_md, docs_df):
	"""Convert LaTeX/Markdown report to HTML with clickable [[n]] citations and MathJax math rendering."""
	import markdown as md_lib
	import json as _json

	if not report_md:
	return '<div style="color:#9ca3af; padding:20px;">Haz clic en el botón para ver el progreso en tiempo real...</div>'

	# Build docs index
	docs_index = _build_docs_index(docs_df)

	# --- Phase 0: LaTeX to HTML pre-processing ---
	processed = _latex_to_html(report_md)

	# Convert remaining Markdown to HTML
	try:
	html_body = md_lib.markdown(
	processed,
	extensions=['tables', 'fenced_code', 'nl2br'],
	)
	except Exception:
	html_body = processed.replace("\n\n", "</p><p>").replace("\n", "<br>")
	html_body = f"<p>{html_body}</p>"

	cite_id_counter = [0]

	# 1. First pass: Replace [[n]] {{BIB:ID}} markers with interactive citations
	bib_pattern = re.compile(r'(?:\[\[(\d+)\]\]\s*)?\{\{BIB:([\w\.\-/]+)\}\}')
	def replace_bib(match):
	idx_str = match.group(1)
	bib_id = match.group(2)

	# Try to resolve by index first
	if idx_str and docs_df is not None and not docs_df.empty:
	try:
	idx = int(idx_str) - 1
	if 0 <= idx < len(docs_df):
	row = docs_df.iloc[idx]
	autores = str(row.get("Autores", ""))
	año = str(row.get("Año", ""))

	parts = [a.strip() for a in autores.split(",")]
	surnames = [p.split()[-1] for p in parts if p and "..." not in p]

	if len(surnames) == 1:
	cite_text = f"[{idx+1}]"
	elif len(surnames) == 2:
	cite_text = f"[{idx+1}]"
	elif len(surnames) > 2:
	cite_text = f"[{idx+1}]"
	else:
	cite_text = f"[{idx+1}]"

	# Build tooltip with author info
	if len(surnames) >= 1:
	if len(surnames) == 1:
	tooltip = f"{surnames[0]} ({año})"
	elif len(surnames) == 2:
	tooltip = f"{surnames[0]} y {surnames[1]} ({año})"
	else:
	tooltip = f"{surnames[0]} et al. ({año})"
	else:
	tooltip = f"Fuente {idx+1} ({año})"

	cite_id_counter[0] += 1
	cid = cite_id_counter[0]

	import math
	import base64
	found = {k: ("" if (isinstance(v, float) and math.isnan(v)) else v) for k, v in row.to_dict().items()}
	data_json = _json.dumps(found, ensure_ascii=False)
	data_b64 = base64.b64encode(data_json.encode('utf-8')).decode('utf-8')
	return f'<span class="cite-link" data-cite-b64="{data_b64}" onclick="showCiteCard(this, {cid})" id="cite_{cid}" title="{tooltip}">{cite_text}</span>'
	except Exception:
	pass

	# Fallback: show the [[n]] as a simple superscript
	if idx_str:
	return f'<sup class="cite-inline">[{idx_str}]</sup>'
	return ""

	html_body = bib_pattern.sub(replace_bib, html_body)

	# 1b. Also handle bare [[n]] without {{BIB:ID}} — common in some model outputs
	bare_bracket_pattern = re.compile(r'\[\[(\d+)\]\]')
	def replace_bare_bracket(match):
	idx_str = match.group(1)
	if docs_df is not None and not docs_df.empty:
	try:
	idx = int(idx_str) - 1
	if 0 <= idx < len(docs_df):
	row = docs_df.iloc[idx]
	autores = str(row.get("Autores", ""))
	año = str(row.get("Año", ""))
	parts = [a.strip() for a in autores.split(",")]
	surnames = [p.split()[-1] for p in parts if p and "..." not in p]

	if len(surnames) >= 1:
	if len(surnames) == 1:
	tooltip = f"{surnames[0]} ({año})"
	elif len(surnames) == 2:
	tooltip = f"{surnames[0]} y {surnames[1]} ({año})"
	else:
	tooltip = f"{surnames[0]} et al. ({año})"
	else:
	tooltip = f"Fuente {idx+1}"

	cite_id_counter[0] += 1
	cid = cite_id_counter[0]
	import math
	import base64
	found = {k: ("" if (isinstance(v, float) and math.isnan(v)) else v) for k, v in row.to_dict().items()}
	data_json = _json.dumps(found, ensure_ascii=False)
	data_b64 = base64.b64encode(data_json.encode('utf-8')).decode('utf-8')
	return f'<span class="cite-link" data-cite-b64="{data_b64}" onclick="showCiteCard(this, {cid})" id="cite_{cid}" title="{tooltip}">[{idx_str}]</span>'
	except Exception:
	pass
	return f'<sup>[{idx_str}]</sup>'

	html_body = bare_bracket_pattern.sub(replace_bare_bracket, html_body)

	# 2. Second pass: Find and wrap existing manual APA citations: (Author, Year)
	citation_pattern = re.compile(
	r'$([A-ZÁÉÍÓÚÑ][a-záéíóúñ]+(?:\s(?:&\|&\|y\|et\s+al\.?\|,\s[A-ZÁÉÍÓÚÑ][a-záéíóúñ]+)))\s,\s*(\d{4}\|s\.f\.)$'
	)

	def replace_citation(match):
	full_match = match.group(0)
	authors_part = match.group(1)
	year_part = match.group(2)

	author_names = re.split(r'\s(?:&\|&\|y\|,)\s', authors_part)
	author_names = [a.strip().replace("et al.", "").strip() for a in author_names if a.strip()]

	found = None
	for a in author_names:
	surname = a.split()[-1].lower() if a else ""
	key = f"{surname}_{year_part}"
	if key in docs_index:
	found = docs_index[key]
	break

	if not found and len(author_names) >= 2:
	combined = "_".join(a.split()[-1].lower() for a in author_names[:2]) + f"_{year_part}"
	if combined in docs_index:
	found = docs_index[combined]

	if not found:
	return f'<span class="cite-inline">{full_match}</span>'

	cite_id_counter[0] += 1
	cid = cite_id_counter[0]

	import math
	import base64
	found_clean = {k: ("" if (isinstance(v, float) and math.isnan(v)) else v) for k, v in found.items()}
	data_json = _json.dumps(found_clean, ensure_ascii=False)
	data_b64 = base64.b64encode(data_json.encode('utf-8')).decode('utf-8')
	return f'<span class="cite-link" data-cite-b64="{data_b64}" onclick="showCiteCard(this, {cid})" id="cite_{cid}">{full_match}</span>'

	html_body = citation_pattern.sub(replace_citation, html_body)

	# Build the floating card container + JS + MathJax
	floating_card_js = FLOATING_CARD_JS

	return f'''<div class="report-interactive" style="
	font-family:'Inter',sans-serif; font-size:14px; line-height:1.75;
	color:var(--text, #e5e7eb); max-height:700px; overflow-y:auto; padding:4px 8px 4px 4px;
	">
	<style>
	.report-interactive h1 {{ font-size:1.5rem; font-weight:700; margin:1.2em 0 0.6em; color:#f3f4f6; border-bottom:1px solid rgba(255,255,255,0.08); padding-bottom:8px; }}
	.report-interactive h2 {{ font-size:1.25rem; font-weight:600; margin:1em 0 0.5em; color:#e5e7eb; }}
	.report-interactive h3 {{ font-size:1.1rem; font-weight:600; margin:0.8em 0 0.4em; color:#d1d5db; }}
	.report-interactive h4 {{ font-size:1rem; font-weight:500; margin:0.6em 0 0.3em; color:#c084fc; }}
	.report-interactive p {{ margin:0.5em 0; text-align:justify; font-size: 15px; }}
	.report-interactive hr {{ border:none; border-top:1px solid rgba(255,255,255,0.06); margin:1.5em 0; }}
	.report-interactive em {{ color:#c084fc; font-style: italic; }}
	.report-interactive strong {{ color:#f3f4f6; }}
	.report-interactive a {{ color:#818cf8; text-decoration:underline; }}
	.report-interactive ul, .report-interactive ol {{ padding-left:1.5em; margin:0.5em 0; font-size: 15px; }}
	.report-interactive li {{ margin:0.3em 0; }}
	.report-interactive blockquote {{ margin:1em 0; padding:8px 16px; color:#9ca3af; font-style: italic; border-left: 3px solid rgba(255,255,255,0.2); }}
	.cite-link {{
	color:#a78bfa; cursor:pointer; font-weight:600;
	border-bottom:1px dashed rgba(167,139,250,0.4);
	transition:all 0.15s ease; padding:0 2px; border-radius:2px;
	font-size:0.85em;
	}}
	.cite-link:hover {{
	background:rgba(139,92,246,0.15); color:#c4b5fd;
	border-bottom-color:rgba(167,139,250,0.7);
	box-shadow:0 0 8px rgba(139,92,246,0.2);
	}}
	.cite-inline {{
	color:#9ca3af; font-style:italic; font-size:0.85em;
	}}
	/* MathJax rendered equations */
	.MathJax {{ font-size:1.05em !important; }}
	</style>
	{html_body}
	{floating_card_js}
	</div>'''


	# ══════════════════════════════════════════════════════════════
	# RESEARCH HANDLER
	# ══════════════════════════════════════════════════════════════


	async def research_handler(
	query, provider, search_model, synthesis_model, translation_model,
	profile, depth, iterations, include_validation, sources,
	enable_dme=True, synthesis_strategy="auto",
	year_start="", year_end="", university="",
	infinite_output=True, max_continuation=5,
	grade_mode="original", geo_context="Automático"
	):
	import pandas as pd

	empty_df = pd.DataFrame(columns=["Título", "Autores", "Año", "DOI", "Fuente", "GRADE", "PDF URL"])
	ref_md = "_Sin referencias disponibles aún..._"
	stats_html = _build_stats_html("", empty_df)

	if not query or not query.strip():
	gr.Warning("Ingrese un tema de investigación")
	yield _build_status_html("error", "Sin consulta"), _build_progress_html(-1), \
	"Error: Ingrese un tema de investigación.", empty_df, \
	"", ref_md, stats_html, ""
	return

	api_key = os.getenv(PROVIDERS.get(provider, {}).get("env_key", ""), "")
	if not api_key:
	env_key = PROVIDERS.get(provider, {}).get("env_key", "?")
	gr.Warning(f"No hay API key para {provider}. Configure {env_key} en .env")
	yield _build_status_html("error", "API key faltante"), _build_progress_html(-1), \
	f"Error: No hay API key para {provider}. Configure `{env_key}` en .env", \
	empty_df, "", ref_md, stats_html, ""
	return

	# Iniciar registro en BD
	from backend.database.models import SessionLocal, User, Project, ResearchJob
	db_job = None
	db = SessionLocal()
	user = db.query(User).filter(User.username == "admin").first()
	if user:
	project = Project(title=f"Investigación: {query[:50]}", owner_id=user.id)
	db.add(project)
	db.commit()
	db_job = ResearchJob(project_id=project.id, query=query, status="running")
	db.add(db_job)
	db.commit()
	db.refresh(db_job)
	db.close()

	search_sources = sources if sources else ["all"]
	pipeline = ResearchPipeline(
	provider=provider, search_model=search_model,
	synthesis_model=synthesis_model, translation_model=translation_model,
	api_key=api_key,
	)

	global _active_pipeline
	_active_pipeline = pipeline

	accumulated_report = ""
	accumulated_df = empty_df
	current_phase = -1

	try:
	async for report_md, docs_df in pipeline.run(
	query=query.strip(), sources=search_sources, profile=profile,
	depth=int(depth), iterations=int(iterations),
	include_validation=include_validation,
	enable_dme=enable_dme, synthesis_strategy=synthesis_strategy,
	year_start=year_start or None, year_end=year_end or None,
	university=university or None, grade_mode=grade_mode,
	geo_context=geo_context,
	infinite_output=infinite_output,
	max_continuation_passes=int(max_continuation),
	):
	accumulated_report = report_md
	if docs_df is not None and not docs_df.empty:
	accumulated_df = docs_df

	detected_phase = _detect_phase(report_md)
	current_phase = detected_phase
	sections_map = _parse_sections_from_report(accumulated_report)
	last_key = list(sections_map.keys())[-1] if sections_map else ""
	extra = f"{len(accumulated_df)} docs" if len(accumulated_df) else ""
	if current_phase == 5 and last_key:
	extra = f"Redactando: {last_key}"

	progress_html = _build_progress_html(current_phase, extra)
	ref_md = _build_references_html(docs_df, accumulated_report)
	stats_html = _build_stats_html(accumulated_report, accumulated_df)
	sections_content = _build_section_cards_html(sections_map)

	paused_label = " ⏸️" if pipeline.is_paused else ""
	yield (
	_build_status_html("running", f"Fase {current_phase}{paused_label}"),
	progress_html, _make_citations_interactive(accumulated_report, accumulated_df), accumulated_df,
	sections_content, ref_md, stats_html, accumulated_report,
	)

	sections_map = _parse_sections_from_report(accumulated_report)
	sections_content = _build_section_cards_html(sections_map, is_done=True)
	ref_md = _build_references_html(docs_df, accumulated_report)
	stats_html = _build_stats_html(accumulated_report, accumulated_df)

	yield (
	_build_status_html("done", f"{len(accumulated_df)} docs \| {len(sections_map)} secciones"),
	_build_progress_html(7), _make_citations_interactive(accumulated_report, accumulated_df), accumulated_df,
	sections_content, ref_md, stats_html, accumulated_report,
	)

	if db_job:
	from datetime import datetime
	db = SessionLocal()
	job = db.query(ResearchJob).get(db_job.id)
	if job:
	job.status = "completed"
	job.report_md = accumulated_report
	job.completed_at = datetime.utcnow()
	db.commit()
	db.close()

	except (StopAsyncIteration, asyncio.CancelledError):
	# Pipeline was stopped by user
	sections_map = _parse_sections_from_report(accumulated_report)
	sections_content = _build_section_cards_html(sections_map, is_done=True)
	ref_md = _build_references_html(docs_df, accumulated_report)
	stats_html = _build_stats_html(accumulated_report, accumulated_df)
	yield (
	_build_status_html("error", "⛔ Detenido por el usuario"),
	_build_progress_html(current_phase, "Detenido"),
	_make_citations_interactive(accumulated_report + "\n\n---\n⛔ Pipeline detenido por el usuario", accumulated_df),
	accumulated_df, sections_content, ref_md, stats_html,
	accumulated_report
	)
	except Exception as e:
	if db_job:
	db = SessionLocal()
	job = db.query(ResearchJob).get(db_job.id)
	if job:
	job.status = "error"
	db.commit()
	db.close()
	yield (
	_build_status_html("error", str(e)[:60]),
	_build_progress_html(current_phase),
	_make_citations_interactive(f"Error: {str(e)}", accumulated_df), accumulated_df, "", ref_md, stats_html,
	accumulated_report
	)
	finally:
	_active_pipeline = None
	await pipeline.close()


	# ══════════════════════════════════════════════════════════════
	# SUPER RESEARCH HANDLER
	# ══════════════════════════════════════════════════════════════

	async def super_research_handler(
	query, provider, search_model, synthesis_model, translation_model,
	profile, depth, rounds, include_validation, sources,
	enable_dme=True, synthesis_strategy="auto",
	year_start="", year_end="", university="",
	infinite_output=True, max_continuation=5,
	grade_mode="original", geo_context="Automático"
	):
	import pandas as pd

	empty_df = pd.DataFrame(columns=["Título", "Autores", "Año", "DOI", "Fuente", "GRADE", "PDF URL"])
	ref_md = "_Sin referencias disponibles aún..._"
	stats_html = _build_stats_html("", empty_df)

	if not query or not query.strip():
	gr.Warning("Ingrese un tema de investigación")
	yield _build_status_html("error", "Sin consulta"), _build_progress_html(-1), \
	"Error: Ingrese un tema de investigación.", empty_df, \
	"", ref_md, stats_html, ""
	return

	api_key = os.getenv(PROVIDERS.get(provider, {}).get("env_key", ""), "")
	if not api_key:
	env_key = PROVIDERS.get(provider, {}).get("env_key", "?")
	gr.Warning(f"No hay API key para {provider}. Configure {env_key} en .env")
	yield _build_status_html("error", "API key faltante"), _build_progress_html(-1), \
	f"Error: No hay API key para {provider}. Configure `{env_key}` en .env", \
	empty_df, "", ref_md, stats_html, ""
	return

	from backend.database.models import SessionLocal, User, Project, ResearchJob
	db_job = None
	db = SessionLocal()
	user = db.query(User).filter(User.username == "admin").first()
	if user:
	project = Project(title=f"Super Inv: {query[:50]}", owner_id=user.id)
	db.add(project)
	db.commit()
	db_job = ResearchJob(project_id=project.id, query=query, status="running")
	db.add(db_job)
	db.commit()
	db.refresh(db_job)
	db.close()

	search_sources = sources if sources else ["all"]
	pipeline = ResearchPipeline(
	provider=provider, search_model=search_model,
	synthesis_model=synthesis_model, translation_model=translation_model,
	api_key=api_key,
	)

	global _active_pipeline
	_active_pipeline = pipeline

	accumulated_report = ""
	accumulated_df = empty_df
	current_phase = -1

	try:
	async for report_md, docs_df in pipeline.run(
	query=query.strip(), sources=search_sources, profile=profile,
	depth=int(depth), iterations=int(rounds),
	include_validation=include_validation,
	enable_dme=enable_dme, synthesis_strategy=synthesis_strategy,
	year_start=year_start or None, year_end=year_end or None,
	university=university or None, grade_mode=grade_mode,
	geo_context=geo_context,
	infinite_output=infinite_output,
	max_continuation_passes=int(max_continuation),
	):
	accumulated_report = report_md
	if docs_df is not None and not docs_df.empty:
	accumulated_df = docs_df

	detected_phase = _detect_phase(report_md)
	current_phase = detected_phase
	sections_map = _parse_sections_from_report(accumulated_report)
	last_key = list(sections_map.keys())[-1] if sections_map else ""
	extra = f"{len(accumulated_df)} docs" if len(accumulated_df) else ""
	if current_phase == 5 and last_key:
	extra = f"Redactando: {last_key}"

	progress_html = _build_progress_html(current_phase, extra)
	ref_md = _build_references_html(docs_df, accumulated_report)
	stats_html = _build_stats_html(accumulated_report, accumulated_df)
	sections_content = _build_section_cards_html(sections_map)

	paused_label = " ⏸️" if pipeline.is_paused else ""
	yield (
	_build_status_html("running", f"Fase {current_phase}{paused_label}"),
	progress_html, _make_citations_interactive(accumulated_report, accumulated_df), accumulated_df,
	sections_content, ref_md, stats_html, accumulated_report
	)

	sections_map = _parse_sections_from_report(accumulated_report)
	sections_content = _build_section_cards_html(sections_map, is_done=True)
	ref_md = _build_references_html(docs_df, accumulated_report)
	stats_html = _build_stats_html(accumulated_report, accumulated_df)

	yield (
	_build_status_html("done", f"{len(accumulated_df)} docs \| {len(sections_map)} secciones"),
	_build_progress_html(7), _make_citations_interactive(accumulated_report, accumulated_df), accumulated_df,
	sections_content, ref_md, stats_html, accumulated_report
	)

	if db_job:
	from datetime import datetime
	db = SessionLocal()
	job = db.query(ResearchJob).get(db_job.id)
	if job:
	job.status = "completed"
	job.report_md = accumulated_report
	job.completed_at = datetime.utcnow()
	db.commit()
	db.close()

	except (StopAsyncIteration, asyncio.CancelledError):
	sections_map = _parse_sections_from_report(accumulated_report)
	sections_content = _build_section_cards_html(sections_map, is_done=True)
	ref_md = _build_references_html(docs_df, accumulated_report)
	stats_html = _build_stats_html(accumulated_report, accumulated_df)
	yield (
	_build_status_html("error", "⛔ Detenido por el usuario"),
	_build_progress_html(current_phase, "Detenido"),
	_make_citations_interactive(accumulated_report + "\n\n---\n⛔ Pipeline detenido por el usuario", accumulated_df),
	accumulated_df, sections_content, ref_md, stats_html,
	accumulated_report
	)
	except Exception as e:
	if db_job:
	db = SessionLocal()
	job = db.query(ResearchJob).get(db_job.id)
	if job:
	job.status = "error"
	db.commit()
	db.close()
	yield (
	_build_status_html("error", str(e)[:60]),
	_build_progress_html(current_phase),
	_make_citations_interactive(f"Error: {str(e)}", accumulated_df), accumulated_df, "", ref_md, stats_html,
	accumulated_report
	)
	finally:
	_active_pipeline = None
	await pipeline.close()



	# ══════════════════════════════════════════════════════════════
	# SÍNTESIS HANDLER
	# ══════════════════════════════════════════════════════════════

	async def synthesis_handler(
	query, docs_text, provider, search_model, synthesis_model,
	translation_model, profile, include_validation,
	enable_dme=True, synthesis_strategy="auto",
	grade_mode="original", geo_context="Automático",
	):
	import pandas as pd

	empty_df = pd.DataFrame(columns=["Título", "Autores", "Año", "DOI", "Fuente", "GRADE", "PDF URL"])
	ref_md = "_Sin referencias disponibles aún..._"
	stats_html = _build_stats_html("", empty_df)

	if not query or not query.strip():
	gr.Warning("Ingrese un tema/título")
	yield _build_status_html("error", "Sin consulta"), _build_progress_html(-1), \
	"Error: Ingrese un tema o título para la síntesis.", empty_df, \
	"", ref_md, stats_html, ""
	return

	if not docs_text or not docs_text.strip():
	gr.Warning("Ingrese al menos 5 documentos")
	yield _build_status_html("error", "Sin documentos"), _build_progress_html(-1), \
	"Error: Pegue la lista de documentos en el campo de texto.", empty_df, \
	"", ref_md, stats_html, ""
	return

	api_key = os.getenv(PROVIDERS.get(provider, {}).get("env_key", ""), "")
	if not api_key:
	env_key = PROVIDERS.get(provider, {}).get("env_key", "?")
	gr.Warning(f"No hay API key para {provider}. Configure {env_key} en .env")
	yield _build_status_html("error", "API key faltante"), _build_progress_html(-1), \
	f"Error: No hay API key para {provider}. Configure `{env_key}` en .env", \
	empty_df, "", ref_md, stats_html, ""
	return

	pipeline = ResearchPipeline(
	provider=provider, search_model=search_model,
	synthesis_model=synthesis_model, translation_model=translation_model,
	api_key=api_key,
	)

	accumulated_report = ""
	current_phase = 0

	try:
	async for report_md, docs_df in pipeline.run(
	query=query.strip(), sources=[], profile=profile,
	iterations=0, include_validation=include_validation,
	docs_text=docs_text, enable_dme=enable_dme,
	synthesis_strategy=synthesis_strategy,
	grade_mode=grade_mode, geo_context=geo_context,
	):
	accumulated_report = report_md
	detected_phase = _detect_phase(report_md)
	if detected_phase != current_phase:
	current_phase = detected_phase

	sections_map = _parse_sections_from_report(accumulated_report)
	sections_content = _build_section_cards_html(sections_map)
	ref_md = _build_references_html(docs_df, accumulated_report)
	stats_html = _build_stats_html(accumulated_report, empty_df)

	yield (
	_build_status_html("running", "Sintetizando"),
	_build_progress_html(current_phase), accumulated_report, empty_df,
	sections_content, ref_md, stats_html, accumulated_report
	)

	sections_map = _parse_sections_from_report(accumulated_report)
	sections_content = _build_section_cards_html(sections_map, is_done=True)
	ref_md = _build_references_html(docs_df, accumulated_report)
	stats_html = _build_stats_html(accumulated_report, empty_df)

	yield (
	_build_status_html("done", "Síntesis completada"),
	_build_progress_html(7), accumulated_report, empty_df,
	sections_content, ref_md, stats_html, accumulated_report
	)

	except Exception as e:
	yield (
	_build_status_html("error", str(e)[:60]),
	_build_progress_html(current_phase),
	f"Error: {str(e)}", empty_df, "", ref_md, stats_html,
	)
	finally:
	await pipeline.close()


	# ══════════════════════════════════════════════════════════════
	# HELPER: Build a premium tab section (shared layout)
	# ══════════════════════════════════════════════════════════════

	def _build_research_panel(prefix, title, subtitle, btn_label, handler_fn, is_super=False):
	"""Build a unified premium research panel for Research/Super/Synthesis tabs"""

	# ─── Header banner ───
	gr.HTML(f'''
	<div style="
	display:flex; justify-content:space-between; align-items:center;
	padding:14px 20px; margin-bottom:12px;
	background:linear-gradient(135deg, rgba(139,92,246,0.08), rgba(99,102,241,0.04));
	border:1px solid rgba(139,92,246,0.2); border-radius:14px;
	">
	<div style="display:flex; align-items:center; gap:12px;">
	<div style="
	width:40px; height:40px; border-radius:12px;
	background:linear-gradient(135deg, #8b5cf6, #6366f1);
	display:flex; align-items:center; justify-content:center;
	font-size:20px; box-shadow:0 4px 15px rgba(139,92,246,0.3);
	">{"🚀" if is_super else "🔬"}</div>
	<div>
	<div style="font-size:16px; font-weight:700; color:var(--text, #fff);">
	{title}
	</div>
	<div style="font-size:11px; color:var(--text-muted, #9ca3af);">
	{subtitle}
	</div>
	</div>
	</div>
	<div style="display:flex; gap:8px;">
	<span style="
	display:inline-flex; align-items:center; gap:5px;
	padding:4px 12px; border-radius:20px; font-size:11px; font-weight:600;
	background:rgba(139,92,246,0.1); border:1px solid rgba(139,92,246,0.3); color:#8b5cf6;
	">Pipeline v2.0</span>
	</div>
	</div>
	''')

	with gr.Row():
	# ─── LEFT: Controls ───
	with gr.Column(scale=2):
	status = gr.HTML(_build_status_html("idle"))
	progress = gr.HTML(_build_progress_html(-1, "Esperando consulta..."))

	gr.HTML('''<div class="section-header">💬 Consulta de investigación</div>''')
	query = gr.Textbox(
	label="",
	placeholder="Ej: Impacto de la IA en la educación superior en Perú",
	lines=3, show_label=False,
	elem_classes=["glass-input-wrapper"]
	)

	with gr.Row():
	prov = gr.Dropdown(
	choices=list(PROVIDERS.keys()), value="mistral",
	label="⚡ Proveedor IA", scale=1,
	)

	with gr.Accordion("🤖 Modelos por Rol", open=False):
	search_m = gr.Dropdown(
	choices=PROVIDERS["mistral"]["models"],
	value=DEFAULT_MODEL, label="🔍 Búsqueda",
	)
	synth_m = gr.Dropdown(
	choices=PROVIDERS["mistral"]["models"],
	value=DEFAULT_MODEL, label="📝 Síntesis",
	)
	trans_m = gr.Dropdown(
	choices=PROVIDERS["mistral"]["models"],
	value=DEFAULT_MODEL, label="🌐 Traducción",
	)
	prov.change(
	fn=update_models, inputs=[prov],
	outputs=[search_m, synth_m, trans_m],
	)

	with gr.Accordion("📚 Parámetros de Búsqueda", open=False):
	src = gr.CheckboxGroup(
	choices=ALL_SOURCES, value=ALL_SOURCES, label="Fuentes", show_label=False,
	)
	gr.HTML('''
	<div style="display:flex; gap:6px; flex-wrap:wrap; margin:6px 0;">
	<span style="font-size:10px; padding:2px 8px; border-radius:6px; background:rgba(59,130,246,0.08); border:1px solid rgba(59,130,246,0.2); color:#3b82f6;">all = todas</span>
	<span style="font-size:10px; padding:2px 8px; border-radius:6px; background:rgba(34,197,94,0.08); border:1px solid rgba(34,197,94,0.2); color:#22c55e;">latam = Latinoamérica</span>
	<span style="font-size:10px; padding:2px 8px; border-radius:6px; background:rgba(168,85,247,0.08); border:1px solid rgba(168,85,247,0.2); color:#a855f7;">global = PubMed+ArXiv+OpenAlex</span>
	</div>
	''')
	with gr.Row():
	prof = gr.Dropdown(
	choices=list(AGENT_PROFILES.keys()),
	value="auto", label="🎭 Perfil",
	)
	dep = gr.Slider(minimum=1, maximum=5, value=3, step=1, label="📏 Profundidad")

	if is_super:
	iters = gr.Slider(minimum=2, maximum=5, value=3, step=1, label="🔄 Rondas")
	else:
	iters = gr.Slider(minimum=1, maximum=5, value=1, step=1, label="🔄 Iteraciones")

	with gr.Accordion("🔧 Opciones Avanzadas", open=False):
	geo = gr.Textbox(value="Automático", label="📍 Contexto Geográfico (País/Universidad)", placeholder="Ej: Perú, Universidad Nacional del Santa")
	val = gr.Checkbox(value=True, label="🔬 Validación de citas (ARA+)")
	dme = gr.Checkbox(value=True, label="🔧 DME: Reparación + Enriquecimiento")
	strat = gr.Radio(
	choices=["lineal", "jerárquica", "auto"],
	value="jerárquica", label="📐 Estrategia de Síntesis",
	)
	grade_mode = gr.Radio(
	choices=["original", "keywords", "llm", "oxford", "hybrid"],
	value="original", label="📊 Algoritmo GRADE",
	info="original: Beta SX \| keywords: Rápido \| llm: IA Preciso \| oxford: CEBM \| hybrid: Mixto",
	)
	with gr.Row():
	yr_s = gr.Textbox(label="📅 Año inicio", placeholder="2020")
	yr_e = gr.Textbox(label="📅 Año fin", placeholder="2025")
	uni = gr.Textbox(label="🏛️ Universidad", placeholder="Ej: UNMSM")
	inf_out = gr.Checkbox(value=True, label="♾️ Output Infinito")
	max_cont = gr.Slider(minimum=1, maximum=10, value=5, step=1, label="🔁 Max Continuaciones")

	btn = gr.Button(
	btn_label, variant="primary", size="lg",
	elem_classes=["ejecutar-btn"]
	)

	# ─── Control Buttons (Stop/Pause/Resume) ───
	with gr.Row():
	pause_btn = gr.Button(
	"⏸️ Pausar", size="sm", variant="secondary",
	elem_classes=["control-btn-pause"]
	)
	resume_btn = gr.Button(
	"▶️ Reanudar", size="sm", variant="secondary",
	elem_classes=["control-btn-resume"]
	)
	stop_btn = gr.Button(
	"⛔ Detener", size="sm", variant="stop",
	elem_classes=["control-btn-stop"]
	)

	# ─── RIGHT: Results ───
	with gr.Column(scale=3):
	with gr.Tabs():
	with gr.TabItem("📄 Informe"):
	report = gr.HTML(_make_citations_interactive("", None))
	with gr.TabItem("📚 Referencias"):
	refs = gr.HTML("_Las referencias aparecerán durante la ejecución..._")
	with gr.TabItem("📑 Secciones"):
	sections = gr.HTML(_build_section_cards_html({}))
	with gr.TabItem("📊 Estadísticas"):
	stats = gr.HTML(_build_stats_html("", None))
	with gr.TabItem("📋 Documentos"):
	docs = gr.Dataframe(
	headers=["Título", "Autores", "Año", "DOI", "Fuente", "GRADE", "PDF URL"],
	label="Documentos Encontrados", wrap=True,
	)
	with gr.TabItem("🌐 Grafo"):
	graph_btn = gr.Button("🌐 Generar Grafo de Relaciones", size="sm", elem_classes=["ejecutar-btn"])
	graph_html = gr.HTML('''<div style="text-align:center; padding:30px; color:#6b7280;">
	<div style="font-size:36px; margin-bottom:8px;">🌐</div>
	<div style="font-size:13px;">Haz clic en el botón para generar el grafo.</div>
	</div>''')
	graph_btn.click(fn=_generate_graph_from_df, inputs=[docs], outputs=[graph_html])

	report_md_state = gr.State("")

	with gr.TabItem("📥 Exportar"):
	gr.HTML('''<div style="padding:12px; background:rgba(99,102,241,0.06); border:1px solid rgba(99,102,241,0.2); border-radius:12px; margin-bottom:12px;">
	<div style="font-size:14px; font-weight:600; color:#818cf8; margin-bottom:4px;">📥 Exportar Resultados</div>
	<div style="font-size:11px; color:#9ca3af;">Descarga el informe y los documentos en distintos formatos.</div>
	</div>''')
	with gr.Row():
	export_md_btn = gr.Button("📄 Markdown (.md)", size="sm", variant="secondary")
	export_bib_btn = gr.Button("📚 BibTeX (.bib)", size="sm", variant="secondary")
	with gr.Row():
	export_docx_btn = gr.Button("📝 Word (.docx)", size="sm", variant="secondary")
	export_zip_btn = gr.Button("📦 ZIP (Workspace)", size="sm", variant="primary")
	export_file = gr.File(label="Archivo generado", visible=True)

	from backend.tools.export_utils import export_markdown, export_bibtex, export_zip, export_docx

	def _do_export_md(report_state, q):
	if not report_state: return gr.update(value=None)
	return export_markdown(report_state, q or "research")

	def _do_export_bib(docs_df, q):
	if docs_df is None or docs_df.empty: return gr.update(value=None)
	return export_bibtex(docs_df, q or "references")

	def _do_export_docx(report_state, q):
	if not report_state: return gr.update(value=None)
	path = export_docx(report_state, q or "research")
	return path if path else gr.update(value=None)

	def _do_export_zip(report_state, docs_df, q):
	if not report_state: return gr.update(value=None)
	import pandas as pd
	if docs_df is None:
	docs_df = pd.DataFrame()
	return export_zip(report_state, docs_df, q or "research")

	export_md_btn.click(fn=_do_export_md, inputs=[report_md_state, query], outputs=[export_file])
	export_bib_btn.click(fn=_do_export_bib, inputs=[docs, query], outputs=[export_file])
	export_docx_btn.click(fn=_do_export_docx, inputs=[report_md_state, query], outputs=[export_file])
	export_zip_btn.click(fn=_do_export_zip, inputs=[report_md_state, docs, query], outputs=[export_file])

	# Create chat tabs
	from modules.chat_tab import create_chat_tabs
	create_chat_tabs(report_md_state, docs, prov, synth_m)

	# Wire control buttons
	stop_btn.click(fn=_control_stop, outputs=[status])
	pause_btn.click(fn=_control_pause, outputs=[status])
	resume_btn.click(fn=_control_resume, outputs=[status])

	# Return all components needed for event binding
	return (btn, query, prov, search_m, synth_m, trans_m, prof, dep, iters,
	val, src, dme, strat, yr_s, yr_e, uni, inf_out, max_cont, grade_mode, geo,
	status, progress, report, docs, sections, refs, stats, report_md_state)



	# ══════════════════════════════════════════════════════════════
	# UI TAB
	# ══════════════════════════════════════════════════════════════

	def create_research_tab():
	with gr.Tab("🔬 Research", id="research"):
	gr.HTML('''<style>
	@keyframes pulse { 0%,100%{opacity:1} 50%{opacity:.6} }
	@keyframes slideIn { from{opacity:0;transform:translateY(8px)} to{opacity:1;transform:translateY(0)} }
	@keyframes fadeIn { from{opacity:0} to{opacity:1} }
	</style>''')

	with gr.Tabs():
	# ─── RESEARCH ───
	with gr.TabItem("🔬 Research"):
	r = _build_research_panel(
	"r", "Research Pipeline",
	"Búsqueda iterativa + síntesis con IA en tiempo real",
	"🚀 Ejecutar Research", research_handler, is_super=False
	)
	r[0].click(
	fn=research_handler,
	inputs=list(r[1:20]),
	outputs=list(r[20:28]),
	)

	# ─── SUPER RESEARCH ───
	with gr.TabItem("🚀 Super Research"):
	s = _build_research_panel(
	"s", "Super Research Pipeline",
	"Investigación profunda multi-ronda con validación cruzada",
	"⚡ Ejecutar Super Research", super_research_handler, is_super=True
	)
	s[0].click(
	fn=super_research_handler,
	inputs=list(s[1:20]),
	outputs=list(s[20:28]),
	)

	# ─── SÍNTESIS ───
	with gr.TabItem("📝 Síntesis"):
	gr.HTML('''
	<div style="
	display:flex; justify-content:space-between; align-items:center;
	padding:14px 20px; margin-bottom:12px;
	background:linear-gradient(135deg, rgba(16,185,129,0.08), rgba(6,182,212,0.04));
	border:1px solid rgba(16,185,129,0.2); border-radius:14px;
	">
	<div style="display:flex; align-items:center; gap:12px;">
	<div style="
	width:40px; height:40px; border-radius:12px;
	background:linear-gradient(135deg, #10b981, #06b6d4);
	display:flex; align-items:center; justify-content:center;
	font-size:20px; box-shadow:0 4px 15px rgba(16,185,129,0.3);
	">📝</div>
	<div>
	<div style="font-size:16px; font-weight:700; color:var(--text, #fff);">
	Síntesis de Documentos
	</div>
	<div style="font-size:11px; color:var(--text-muted, #9ca3af);">
	Generar informe a partir de documentos proporcionados
	</div>
	</div>
	</div>
	</div>
	''')

	with gr.Row():
	with gr.Column(scale=2):
	y_status = gr.HTML(_build_status_html("idle"))
	y_progress = gr.HTML(_build_progress_html(-1, "Esperando consulta..."))

	gr.HTML('''<div class="section-header">💬 Tema / Título</div>''')
	y_query = gr.Textbox(
	label="", show_label=False,
	placeholder="Ej: Marco teórico sobre gestión del conocimiento",
	lines=2, elem_classes=["glass-input-wrapper"]
	)

	gr.HTML('''<div class="section-header" style="margin-top:8px;">📄 Documentos</div>''')
	y_docs = gr.Textbox(
	label="", show_label=False,
	placeholder="[1] García (2023) - Gestión del conocimiento en Perú\n[2] Smith (2022) - Knowledge management systems\n[3] López (2024) - Bases de datos académicas",
	lines=8, elem_classes=["glass-input-wrapper"]
	)

	y_provider = gr.Dropdown(
	choices=list(PROVIDERS.keys()), value="mistral",
	label="⚡ Proveedor IA",
	)

	with gr.Accordion("🤖 Modelos por Rol", open=False):
	y_search_model = gr.Dropdown(
	choices=PROVIDERS["mistral"]["models"],
	value=DEFAULT_MODEL, label="🔍 Búsqueda",
	)
	y_synthesis_model = gr.Dropdown(
	choices=PROVIDERS["mistral"]["models"],
	value=DEFAULT_MODEL, label="📝 Síntesis",
	)
	y_translation_model = gr.Dropdown(
	choices=PROVIDERS["mistral"]["models"],
	value=DEFAULT_MODEL, label="🌐 Traducción",
	)
	y_provider.change(
	fn=update_models, inputs=[y_provider],
	outputs=[y_search_model, y_synthesis_model, y_translation_model],
	)

	with gr.Accordion("🔧 Opciones Avanzadas", open=False):
	with gr.Row():
	y_profile = gr.Dropdown(
	choices=list(AGENT_PROFILES.keys()),
	value="auto", label="🎭 Perfil",
	)
	y_validation = gr.Checkbox(value=True, label="🔬 Validación ARA+")
	y_geo = gr.Textbox(value="Automático", label="📍 Contexto Geográfico (País/Universidad)", placeholder="Ej: Perú, Universidad Nacional del Santa")
	y_enable_dme = gr.Checkbox(value=True, label="🔧 DME")
	y_synthesis_strategy = gr.Radio(
	choices=["lineal", "jerárquica", "auto"],
	value="jerárquica", label="📐 Estrategia",
	)
	y_grade_mode = gr.Radio(
	choices=["original", "keywords", "llm", "oxford", "hybrid"],
	value="original", label="📊 Algoritmo GRADE",
	)

	y_btn = gr.Button(
	"📝 Ejecutar Síntesis", variant="primary", size="lg",
	elem_classes=["ejecutar-btn"]
	)

	with gr.Column(scale=3):
	with gr.Tabs():
	with gr.TabItem("📄 Informe"):
	y_report = gr.HTML(_make_citations_interactive("", None))
	with gr.TabItem("📚 Referencias"):
	y_refs = gr.Markdown("_Las referencias aparecerán aquí..._")
	with gr.TabItem("📑 Secciones"):
	y_sections = gr.HTML(_build_section_cards_html({}))
	with gr.TabItem("📊 Estadísticas"):
	y_stats = gr.HTML(_build_stats_html("", None))
	with gr.TabItem("📋 Documentos"):
	y_docs_out = gr.Dataframe(
	headers=["Título", "Autores", "Año", "DOI", "Fuente", "GRADE", "PDF URL"],
	label="Documentos", wrap=True,
	)

	y_report_md_state = gr.State("")

	y_btn.click(
	fn=synthesis_handler,
	inputs=[
	y_query, y_docs, y_provider, y_search_model,
	y_synthesis_model, y_translation_model, y_profile,
	y_validation, y_enable_dme, y_synthesis_strategy,
	y_grade_mode, y_geo,
	],
	outputs=[
	y_status, y_progress, y_report, y_docs_out,
	y_sections, y_refs, y_stats, y_report_md_state,
	],
	)