HydraDeck / app.py
OpeneR
Remove Gradio runtime and use FastAPI-only backend
c27b609
from __future__ import annotations
import warnings
warnings.filterwarnings(
"ignore",
message=r"urllib3 v2 only supports OpenSSL 1\.1\.1\+.*",
)
import tempfile
import zipfile
import json
import time
from concurrent.futures import ThreadPoolExecutor
from queue import Empty, Queue
from pathlib import Path
from typing import Any
from urllib.error import HTTPError, URLError
from urllib.parse import quote, urlparse
from urllib.request import Request, urlopen
from hydradeck.clients import ChatMessage, GrokClient
from hydradeck.config import resolve_api_key, resolve_base_url, resolve_model
from hydradeck.core.types import RunConfig
from hydradeck.pipeline import run
from hydradeck.render import (
build_slide_frames_from_sections,
enforce_slide_density,
render_beamer_frames,
render_paper,
render_report_structured,
)
CHROME_144_UA = (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/144.0.0.0 Safari/537.36"
)
def _normalized_base_url(base_url: str) -> str:
parsed = urlparse(base_url.strip())
if parsed.scheme not in {"http", "https"}:
raise ValueError("Base URL must start with http:// or https://")
if not parsed.netloc:
raise ValueError("Base URL is missing host")
return base_url.strip().rstrip("/")
def _preflight_check(base_url: str, api_key: str, request_budget: float) -> str | None:
if not api_key.strip():
return "Missing API key. Fill API Key field or set GROK_API_KEY before running."
try:
normalized = _normalized_base_url(base_url)
except ValueError as exc:
return f"Invalid Base URL: {exc}"
probe_url = f"{normalized}/v1/models"
timeout_s = max(2.0, min(float(request_budget), 6.0))
req = Request(
probe_url,
headers={
"Authorization": f"Bearer {api_key.strip()}",
"User-Agent": CHROME_144_UA,
},
)
try:
with urlopen(req, timeout=timeout_s):
return None
except HTTPError as exc:
try:
body = exc.read().decode("utf-8", errors="replace")
except Exception:
body = ""
if exc.code == 403 and "error code: 1010" in body.lower():
return (
"Gateway blocked this client (Cloudflare 1010), not an API-key issue. "
"Try another network/egress IP or ask gateway admin to allow this IP."
)
if exc.code in {401, 403}:
return "API key rejected (401/403). Please update GROK_API_KEY or paste a valid key."
return f"API endpoint returned HTTP {exc.code} during preflight."
except URLError as exc:
return f"Cannot reach API endpoint ({probe_url}): {exc.reason}"
except TimeoutError:
return (
f"API preflight timed out after {timeout_s:.0f}s. "
"Try mock mode first, then increase Request budget."
)
def _api_quick_check(base_url: str, api_key: str, model: str, request_budget: float) -> str:
selected_base_url = base_url.strip() or resolve_base_url("https://api.example.com")
selected_api_key = api_key.strip() or resolve_api_key()
preflight_error = _preflight_check(selected_base_url, selected_api_key, request_budget)
if preflight_error is not None:
return f"API check failed: {preflight_error}"
normalized = _normalized_base_url(selected_base_url)
req_model = model.strip() or resolve_model("grok-3-mini")
payload = {
"model": req_model,
"messages": [{"role": "user", "content": "reply with exactly: API_OK"}],
"temperature": 0,
"max_tokens": 8,
}
req = Request(
f"{normalized}/v1/chat/completions",
method="POST",
data=json.dumps(payload).encode("utf-8"),
headers={
"Authorization": f"Bearer {selected_api_key.strip()}",
"User-Agent": CHROME_144_UA,
"Content-Type": "application/json",
},
)
timeout_s = max(3.0, min(float(request_budget), 12.0))
try:
with urlopen(req, timeout=timeout_s) as resp:
body = resp.read().decode("utf-8", errors="replace")
except HTTPError as exc:
text = exc.read().decode("utf-8", errors="replace")
return f"API check failed: HTTP {exc.code} {text[:180]}"
except URLError as exc:
return f"API check failed: network error {exc.reason}"
except TimeoutError:
return f"API check failed: completion timeout after {timeout_s:.0f}s"
if "API_OK" not in body:
return f"API check uncertain: completion returned unexpected body: {body[:180]}"
return "API check passed: models/completions reachable and auth works."
def _compile_latex_online(tex_source: str, output_name: str) -> str:
def _compile_via_hosted_url(command: str) -> bytes:
upload_req = Request("https://paste.rs", data=tex_source.encode("utf-8"), method="POST")
with urlopen(upload_req, timeout=30) as upload_resp:
hosted_url = upload_resp.read().decode("utf-8", errors="replace").strip()
compile_from_url = (
"https://latexonline.cc/compile?url="
+ quote(hosted_url, safe=":/?=&")
+ "&command="
+ command
+ "&force=true"
)
req2 = Request(compile_from_url, headers={"User-Agent": CHROME_144_UA})
with urlopen(req2, timeout=120) as resp2:
return resp2.read()
errors: list[str] = []
blob = b""
for command in ["xelatex", "lualatex", "pdflatex"]:
try:
encoded = quote(tex_source, safe="")
compile_url = (
"https://latexonline.cc/compile?text="
+ encoded
+ "&command="
+ command
+ "&force=true"
)
if len(compile_url) > 6000:
blob = _compile_via_hosted_url(command)
else:
req = Request(compile_url, headers={"User-Agent": CHROME_144_UA})
with urlopen(req, timeout=90) as resp:
blob = resp.read()
if blob.startswith(b"%PDF"):
break
blob = _compile_via_hosted_url(command)
if blob.startswith(b"%PDF"):
break
errors.append(f"{command}: non-pdf response")
except HTTPError as exc:
body = exc.read().decode("utf-8", errors="replace")
errors.append(f"{command}: HTTP {exc.code} {body[:500]}")
except Exception as exc:
errors.append(f"{command}: {exc}")
if not blob.startswith(b"%PDF"):
raise RuntimeError("online renderer failed: " + " | ".join(errors[:3]))
out_path = Path("/tmp") / output_name
_ = out_path.write_bytes(blob)
return str(out_path)
def _extract_json_object(text: str) -> dict[str, Any]:
raw = text.strip()
if not raw:
raise RuntimeError("empty JSON response")
try:
parsed = json.loads(raw)
if isinstance(parsed, dict):
return parsed
except json.JSONDecodeError:
pass
start = raw.find("{")
end = raw.rfind("}")
if start == -1 or end == -1 or end <= start:
raise RuntimeError("no JSON object found in response")
parsed2 = json.loads(raw[start : end + 1])
if not isinstance(parsed2, dict):
raise RuntimeError("top-level JSON is not an object")
return parsed2
def _chat_json_resilient(
client: GrokClient,
messages: list[ChatMessage],
schema_hint: str,
temperature: float,
timeout_s: float,
) -> dict[str, Any]:
try:
obj = client.chat_json(
messages,
schema_hint=schema_hint,
temperature=temperature,
timeout_s=timeout_s,
)
if isinstance(obj, dict):
return obj
except Exception:
pass
try:
text = client.chat_text(messages, temperature=temperature, timeout_s=timeout_s)
return _extract_json_object(text)
except Exception:
return {}
def _build_stage_model_map(
requested_model: str,
overrides: dict[str, str] | None = None,
) -> dict[str, str]:
base = requested_model.strip() or resolve_model("grok-3-mini")
high = base
if "mini" in base:
high = base.replace("-mini", "")
if high == base and base == "grok-3-mini":
high = "grok-3"
model_map = {
"scope": base,
"structure": high,
"planner": high,
"section": base,
"paper": high,
"slides": high,
}
if overrides:
for key in model_map:
v = overrides.get(key, "").strip()
if v:
model_map[key] = v
return model_map
def _looks_like_template_text(text: str) -> bool:
low = text.lower().strip()
if not low:
return True
bad_markers = [
"this section is generated",
"no content generated",
"lorem ipsum",
"to be filled",
"placeholder",
"add key evidence-backed findings",
"补充关键事实与证据",
]
return any(m in low for m in bad_markers)
def _assert_not_template_output(module_name: str, text: str) -> None:
if _looks_like_template_text(text):
raise RuntimeError(f"{module_name} produced template-like content; retry required")
def _section_quality_ok(section_title: str, latex_body: str, language: str) -> bool:
if _looks_like_template_text(latex_body):
return False
body = latex_body.strip()
if len(body) < 120:
return False
if language == "zh":
zh_chars = sum(1 for ch in body if "\u4e00" <= ch <= "\u9fff")
if zh_chars < 20:
return False
else:
words = [w for w in body.replace("\n", " ").split(" ") if w]
if len(words) < 40:
return False
_ = section_title
return True
def _run_agentic_pipeline(
topic: str,
model: str,
base_url: str,
api_key: str,
request_budget: float,
use_mock: bool,
progress=None,
stage_callback=None,
language: str = "en",
stage_models: dict[str, str] | None = None,
) -> tuple[str, str, str, str, str, str, str, str, str]:
if not topic.strip():
return "Topic is required.", "", "", "", "", "", "", "", ""
selected_base_url = base_url.strip() or resolve_base_url("https://api.example.com")
selected_api_key = api_key.strip() or resolve_api_key()
selected_model = model.strip() or resolve_model("grok-3-mini")
lang = language.strip().lower()
if lang not in {"en", "zh"}:
lang = "en"
model_map = _build_stage_model_map(selected_model, overrides=stage_models)
total_steps = 9
stage_logs: list[str] = []
def mark(step: int, label: str, detail: str) -> None:
pct = min(max(step / total_steps, 0.0), 1.0)
if callable(progress):
_ = progress(pct, desc=label)
stage_logs.append(f"{step}/{total_steps} {label}: {detail}")
def emit_stage(
step: int,
label: str,
detail: str,
scope_text: str = "",
section_text: str = "",
paper_text: str = "",
slides_text: str = "",
pdf_paths_text: str = "",
paper_pdf_text: str = "",
slides_pdf_text: str = "",
) -> None:
if stage_callback is None:
return
payload = {
"status": f"Running: {label}",
"progress_log": "\n".join(stage_logs),
"scope": scope_text,
"sections": section_text,
"paper": paper_text,
"slides": slides_text,
"pdf_paths": pdf_paths_text,
"paper_pdf": paper_pdf_text,
"slides_pdf": slides_pdf_text,
"progress": int(min(100, max(0, round(step / total_steps * 100)))),
"stage": label,
"detail": detail,
}
stage_callback(payload)
mark(1, "Preflight", "checking API connectivity")
emit_stage(1, "Preflight", "checking API connectivity")
if not use_mock:
preflight_error = _preflight_check(selected_base_url, selected_api_key, request_budget)
if preflight_error is not None:
return (
f"Agentic run failed: {preflight_error}",
"\n".join(stage_logs),
"",
"",
"",
"",
"",
"",
"",
)
scope_payload: dict[str, object]
section_plan: list[dict[str, str]]
section_blocks: list[dict[str, str]] = []
paper_tex = ""
slides_tex = ""
if use_mock:
mark(2, "Agent-1 ScopeScout", "using mock scope")
scope_payload = {
"project_links": [
{
"title": "RynnBrain repo",
"url": "https://github.com/alibaba-damo-academy/RynnBrain",
"reason": "Core project artifact",
},
{
"title": "arXiv references",
"url": "https://arxiv.org",
"reason": "Peer-reviewed baseline papers",
},
],
"scope": {
"in_scope": ["architecture", "training/inference workflow", "evaluation evidence"],
"out_scope": ["business roadmap", "non-technical marketing claims"],
"key_questions": [
"What problem is solved?",
"What architecture choices matter?",
"What evidence supports claims?",
],
},
}
emit_stage(
2,
"Agent-1 ScopeScout",
"scope resolved",
scope_text=json.dumps(scope_payload, ensure_ascii=False, indent=2),
)
mark(3, "Agent-StructureDesigner", "designing report structure")
structure_plan = {
"title": topic.strip(),
"sections": [
{"name": "Abstract", "goal": "State problem, method, key findings, and significance."},
{"name": "Introduction", "goal": "Context, motivation, and clear research question."},
{"name": "Methodology", "goal": "System design, assumptions, and evaluation protocol."},
{"name": "Results", "goal": "Evidence-backed findings with explicit source links."},
{"name": "Discussion", "goal": "Interpretation, limitations, and trade-offs."},
{"name": "Conclusion", "goal": "Takeaways and future work."},
],
"slide_style": {
"max_bullets": 5,
"max_words_per_bullet": 14,
"visual_density": "low",
"must_include": ["agenda", "method diagram slide", "results table slide", "limitations"],
},
}
emit_stage(
3,
"Agent-StructureDesigner",
"report structure designed",
scope_text=json.dumps(scope_payload, ensure_ascii=False, indent=2),
section_text=json.dumps(structure_plan, ensure_ascii=False, indent=2),
)
mark(4, "Agent-2 TemplatePlanner", "building section summaries from templates")
section_plan = [
{"name": "Abstract", "summary": "Concise summary of problem, method, findings, and impact."},
{"name": "Introduction", "summary": "Problem framing and motivation in research context."},
{"name": "Methodology", "summary": "System architecture and methodological decisions."},
{"name": "Results", "summary": "Empirical findings and traceable evidence."},
{"name": "Discussion", "summary": "Interpretation of findings and practical implications."},
{"name": "Conclusion", "summary": "Actionable takeaways and next steps."},
]
if lang == "zh":
section_plan = [
{"name": "摘要", "summary": "概述研究问题、方法、关键发现与价值。"},
{"name": "引言", "summary": "说明背景、动机与研究问题。"},
{"name": "方法", "summary": "阐述系统架构、方法流程与评估设置。"},
{"name": "结果", "summary": "给出可追溯证据支持的核心结论。"},
{"name": "讨论", "summary": "解释结果意义、局限与适用边界。"},
{"name": "结论", "summary": "总结与后续研究建议。"},
]
emit_stage(
4,
"Agent-2 TemplatePlanner",
"section plan prepared",
scope_text=json.dumps(scope_payload, ensure_ascii=False, indent=2),
section_text=json.dumps({"sections": section_plan}, ensure_ascii=False, indent=2),
)
mark(5, "Section Agents", "drafting per-section TeX blocks")
for sec in section_plan:
section_blocks.append(
{
"name": sec["name"],
"latex": (
f"\\subsection*{{{sec['name']}}}\n"
f"{sec['summary']}\\\n"
"Evidence should map directly to claims and include method-specific details."
),
}
)
emit_stage(
5,
"Section Agents",
"section drafts ready",
scope_text=json.dumps(scope_payload, ensure_ascii=False, indent=2),
section_text=json.dumps({"sections": section_plan}, ensure_ascii=False, indent=2),
paper_text="\n\n".join(block["latex"] for block in section_blocks),
)
mark(6, "Integrator-Paper", "merging section TeX into paper")
paper_tex = render_report_structured(topic.strip(), section_blocks, language=lang)
mark(7, "Integrator-Beamer", "building slide deck from report")
frames = build_slide_frames_from_sections(section_blocks, language=lang)
frames = enforce_slide_density(frames, language=lang)
slides_tex = render_beamer_frames(topic.strip(), frames, language=lang)
else:
timeout_s = max(12.0, min(float(request_budget), 40.0))
client_scope = GrokClient(
base_url=selected_base_url,
api_key=selected_api_key,
model=model_map["scope"],
timeout_s=timeout_s,
max_retries=2,
heartbeat=False,
)
client_structure = GrokClient(
base_url=selected_base_url,
api_key=selected_api_key,
model=model_map["structure"],
timeout_s=timeout_s,
max_retries=2,
heartbeat=False,
)
client_planner = GrokClient(
base_url=selected_base_url,
api_key=selected_api_key,
model=model_map["planner"],
timeout_s=timeout_s,
max_retries=2,
heartbeat=False,
)
client_section = GrokClient(
base_url=selected_base_url,
api_key=selected_api_key,
model=model_map["section"],
timeout_s=timeout_s,
max_retries=2,
heartbeat=False,
)
client_paper = GrokClient(
base_url=selected_base_url,
api_key=selected_api_key,
model=model_map["paper"],
timeout_s=timeout_s,
max_retries=2,
heartbeat=False,
)
client_slides = GrokClient(
base_url=selected_base_url,
api_key=selected_api_key,
model=model_map["slides"],
timeout_s=timeout_s,
max_retries=2,
heartbeat=False,
)
quick_scope = {
"project_links": [
{
"title": f"{topic.strip()} official repository",
"url": "https://github.com",
"reason": "Seed placeholder before remote scope enrichment.",
}
],
"scope": {
"in_scope": ["architecture", "method", "evidence"],
"out_scope": ["marketing narrative", "non-technical roadmap"],
"key_questions": [
"What core problem is solved?",
"What design decisions matter most?",
"What evidence is verifiable?",
],
},
}
emit_stage(
2,
"Agent-1 ScopeScout",
"quick skeleton ready; enriching with remote call",
scope_text=json.dumps(quick_scope, ensure_ascii=False, indent=2),
)
mark(2, "Agent-1 ScopeScout", "asking Grok for project links + scope")
try:
scope_payload = _chat_json_resilient(
client_scope,
[
ChatMessage(
role="system",
content=(
"You are ScopeScout. Find key project links and define an initial technical research scope."
),
),
ChatMessage(
role="user",
content=(
"Topic: "
+ topic.strip()
+ "\nReturn JSON with keys: project_links (list of {title,url,reason}),"
+ " scope ({in_scope:[...], out_scope:[...], key_questions:[...]})"
),
),
],
schema_hint=(
'{"project_links":[{"title":"...","url":"https://...","reason":"..."}],'
'"scope":{"in_scope":["..."],"out_scope":["..."],"key_questions":["..."]}}'
),
temperature=0.1,
timeout_s=min(timeout_s, 18.0),
)
except Exception:
scope_payload = quick_scope
emit_stage(
2,
"Agent-1 ScopeScout",
"scope resolved",
scope_text=json.dumps(scope_payload, ensure_ascii=False, indent=2),
)
mark(3, "Agent-StructureDesigner", "designing report architecture and slide style")
structure_obj = _chat_json_resilient(
client_structure,
[
ChatMessage(
role="system",
content=(
"You are StructureDesigner. Build a publication-grade report architecture and a presentation"
" style guide before drafting any sections."
+ (" Respond in Chinese." if lang == "zh" else " Respond in English.")
),
),
ChatMessage(
role="user",
content=(
"Topic: "
+ topic.strip()
+ "\nScope JSON: "
+ json.dumps(scope_payload, ensure_ascii=False)
+ "\nReturn JSON {report_blueprint:{section_order:[...],section_goals:[...]},"
+ " slide_style:{theme,max_bullets,max_words_per_bullet,visual_rules:[...]}}"
+ " Ensure this is a RESEARCH REPORT structure (not academic paper IMRaD rigidity)."
),
),
],
schema_hint='{"report_blueprint":{"section_order":["..."],"section_goals":["..."]},"slide_style":{"theme":"..."}}',
temperature=0.15,
timeout_s=timeout_s,
)
if not isinstance(structure_obj, dict) or not structure_obj:
structure_obj = {
"report_blueprint": {
"section_order": [
"Abstract",
"Introduction",
"Methodology",
"Results",
"Discussion",
"Conclusion",
],
"section_goals": [
"Summarize research contribution",
"Define context and question",
"Describe method rigorously",
"Present evidence with citations",
"Discuss limits and implications",
"Conclude and future work",
],
},
"slide_style": {
"theme": "metropolis-like clean",
"max_bullets": 5,
"max_words_per_bullet": 14,
"visual_rules": [
"one idea per slide",
"results in table/figure frame",
"consistent color accents",
],
},
}
emit_stage(
3,
"Agent-StructureDesigner",
"structure blueprint ready",
scope_text=json.dumps(scope_payload, ensure_ascii=False, indent=2),
section_text=json.dumps(structure_obj, ensure_ascii=False, indent=2),
)
mark(4, "Agent-2 TemplatePlanner", "mapping scope to paper/beamer section summaries")
section_obj = _chat_json_resilient(
client_planner,
[
ChatMessage(
role="system",
content=(
"You are TemplatePlanner. Based on scope and LaTeX paper/beamer structure, define section"
" summaries that downstream section agents will write."
+ (" Respond in Chinese." if lang == "zh" else " Respond in English.")
),
),
ChatMessage(
role="user",
content=(
"Topic: "
+ topic.strip()
+ "\nScope JSON: "
+ json.dumps(scope_payload, ensure_ascii=False)
+ "\nStructure JSON: "
+ json.dumps(structure_obj, ensure_ascii=False)
+ "\nReturn JSON: {sections:[{name,summary}]} with 6-8 sections for a RESEARCH REPORT."
+ " Ensure section names are concise and audience-friendly."
),
),
],
schema_hint='{"sections":[{"name":"Introduction","summary":"..."}]}',
temperature=0.1,
timeout_s=timeout_s,
)
raw_sections = section_obj.get("sections")
section_plan = [
{"name": str(x.get("name", "Section")), "summary": str(x.get("summary", ""))}
for x in raw_sections
if isinstance(x, dict)
] if isinstance(raw_sections, list) else []
section_plan = section_plan[:6]
if not section_plan:
section_plan = [
{"name": "Abstract", "summary": "Concise summary of contribution and findings."},
{"name": "Introduction", "summary": "Problem framing and objectives."},
{"name": "Methodology", "summary": "Core architecture and methodology."},
{"name": "Results", "summary": "Findings grounded in verifiable sources."},
]
emit_stage(
4,
"Agent-2 TemplatePlanner",
"section plan prepared",
scope_text=json.dumps(scope_payload, ensure_ascii=False, indent=2),
section_text=json.dumps({"sections": section_plan}, ensure_ascii=False, indent=2),
)
mark(5, "Section Agents", "researching each section and drafting TeX fragments")
for idx, sec in enumerate(section_plan, start=1):
section_title = sec["name"]
latex_body = ""
for attempt in range(1, 4):
sec_obj = _chat_json_resilient(
client_section,
[
ChatMessage(
role="system",
content=(
"You are a SectionResearchAgent. Write a rigorous LaTeX fragment for your assigned"
" section only."
+ (" Output Chinese text." if lang == "zh" else " Output English text.")
),
),
ChatMessage(
role="user",
content=(
f"Topic: {topic.strip()}\nSection: {sec['name']}\nSummary: {sec['summary']}\n"
f"Structure JSON: {json.dumps(structure_obj, ensure_ascii=False)}\n"
"Return JSON {section_title, latex_body}. latex_body must be plain LaTeX paragraphs"
" without documentclass/begin{document}, with evidence-driven style and citation markers."
" Keep each paragraph focused and concise for report readability."
" Minimum: 2 substantive paragraphs. No placeholder text."
),
),
],
schema_hint='{"section_title":"...","latex_body":"\\subsection*{...} ..."}',
temperature=0.1,
timeout_s=timeout_s,
)
cand_title = sec_obj.get("section_title")
cand_body = sec_obj.get("latex_body")
if isinstance(cand_title, str) and cand_title.strip():
section_title = cand_title.strip()
if isinstance(cand_body, str):
latex_body = cand_body.strip()
if _section_quality_ok(section_title, latex_body, lang):
break
emit_stage(
5,
"Section Agents",
f"quality gate retry {attempt}/3 for section {idx}",
scope_text=json.dumps(scope_payload, ensure_ascii=False, indent=2),
section_text=json.dumps({"sections": section_plan}, ensure_ascii=False, indent=2),
paper_text="\n\n".join(block["latex"] for block in section_blocks),
)
if not _section_quality_ok(section_title, latex_body, lang):
raise RuntimeError(
f"Section agent failed quality gate after retries: {section_title}"
)
section_blocks.append({"name": section_title, "latex": latex_body})
mark(4, "Section Agents", f"completed {idx}/{len(section_plan)} sections")
emit_stage(
5,
"Section Agents",
f"completed {idx}/{len(section_plan)} sections",
scope_text=json.dumps(scope_payload, ensure_ascii=False, indent=2),
section_text=json.dumps({"sections": section_plan}, ensure_ascii=False, indent=2),
paper_text="\n\n".join(block["latex"] for block in section_blocks),
)
mark(6, "Integrator-Paper", "assembling full paper.tex")
paper_obj = _chat_json_resilient(
client_paper,
[
ChatMessage(
role="system",
content=(
"You are ReportIntegrator. Produce a professional LaTeX RESEARCH REPORT"
" with executive readability, clear argument flow, and section coherence."
+ (" Output Chinese text." if lang == "zh" else " Output English text.")
),
),
ChatMessage(
role="user",
content=(
"Topic: "
+ topic.strip()
+ "\nScope: "
+ json.dumps(scope_payload, ensure_ascii=False)
+ "\nStructure: "
+ json.dumps(structure_obj, ensure_ascii=False)
+ "\nSection snippets: "
+ json.dumps(section_blocks, ensure_ascii=False)
+ "\nReturn JSON {paper_tex} with a full compilable document using report sections:"
+ " Executive Summary/Abstract, Background, Approach, Results, Discussion, Risks, Conclusion, References."
+ " Each section should include concrete evidence statements and implementation-level details,"
+ " not high-level filler. Minimum 2-4 substantive paragraphs per major section."
),
),
],
schema_hint='{"paper_tex":"\\documentclass{article} ... \\end{document}"}',
temperature=0.1,
timeout_s=timeout_s,
)
_paper_candidate = paper_obj.get("paper_tex")
paper_tex = render_report_structured(topic.strip(), section_blocks, language=lang)
_assert_not_template_output("paper", paper_tex)
emit_stage(
6,
"Integrator-Paper",
"paper.tex assembled",
scope_text=json.dumps(scope_payload, ensure_ascii=False, indent=2),
section_text=json.dumps({"sections": section_plan}, ensure_ascii=False, indent=2),
paper_text=paper_tex,
)
mark(7, "Integrator-Beamer", "assembling full slides.tex")
slides_obj = _chat_json_resilient(
client_slides,
[
ChatMessage(
role="system",
content=(
"You are BeamerIntegrator. Produce a visually polished, conference-style Beamer deck"
" with concise bullets, visual hierarchy, and readable spacing."
+ (" Output Chinese text." if lang == "zh" else " Output English text.")
),
),
ChatMessage(
role="user",
content=(
"Topic: "
+ topic.strip()
+ "\nScope: "
+ json.dumps(scope_payload, ensure_ascii=False)
+ "\nSection plan: "
+ json.dumps(section_plan, ensure_ascii=False)
+ "\nSlide style: "
+ json.dumps(structure_obj.get("slide_style", {}), ensure_ascii=False)
+ "\nReturn JSON {slides_tex} with a full compilable beamer document."
+ " Use modern readable typography, max 5 bullets/frame, max 14 words/bullet,"
+ " and ensure each frame content fully fits without overflow."
+ " Include complete coverage: agenda, background, method, results, discussion, conclusion."
+ " Return STRICTLY compilable LaTeX without custom undefined macros."
),
),
],
schema_hint='{"slides_tex":"\\documentclass{beamer} ... \\end{document}"}',
temperature=0.1,
timeout_s=timeout_s,
)
_slides_candidate = slides_obj.get("slides_tex")
frames = build_slide_frames_from_sections(section_blocks, language=lang)
frames = enforce_slide_density(frames, language=lang)
slides_tex = render_beamer_frames(topic.strip(), frames, language=lang)
_assert_not_template_output("slides", slides_tex)
emit_stage(
7,
"Integrator-Beamer",
"slides.tex assembled",
scope_text=json.dumps(scope_payload, ensure_ascii=False, indent=2),
section_text=json.dumps({"sections": section_plan}, ensure_ascii=False, indent=2),
paper_text=paper_tex,
slides_text=slides_tex,
)
mark(8, "Online Render", "compiling paper/slides to PDF via latexonline.cc")
emit_stage(
8,
"Online Render",
"rendering started",
scope_text=json.dumps(scope_payload, ensure_ascii=False, indent=2),
section_text=json.dumps({"sections": section_plan}, ensure_ascii=False, indent=2),
paper_text=paper_tex,
slides_text=slides_tex,
)
try:
paper_pdf = _compile_latex_online(paper_tex, "hydradeck_agentic_paper.pdf")
slides_pdf = _compile_latex_online(slides_tex, "hydradeck_agentic_slides.pdf")
emit_stage(
8,
"Online Render",
"pdf rendered",
scope_text=json.dumps(scope_payload, ensure_ascii=False, indent=2),
section_text=json.dumps({"sections": section_plan}, ensure_ascii=False, indent=2),
paper_text=paper_tex,
slides_text=slides_tex,
pdf_paths_text=paper_pdf + "\n" + slides_pdf,
paper_pdf_text=paper_pdf,
slides_pdf_text=slides_pdf,
)
except Exception as exc:
return (
f"Agentic run partial success: TeX generated but online PDF render failed: {exc}",
"\n".join(stage_logs),
json.dumps(scope_payload, ensure_ascii=False, indent=2),
json.dumps({"sections": section_plan}, ensure_ascii=False, indent=2),
paper_tex,
slides_tex,
"",
"",
"",
)
mark(9, "Done", "paper/slides PDFs rendered and ready")
return (
"Agentic pipeline done: scoped, drafted, integrated, rendered to PDF.",
"\n".join(stage_logs),
json.dumps(scope_payload, ensure_ascii=False, indent=2),
json.dumps({"sections": section_plan}, ensure_ascii=False, indent=2),
paper_tex,
slides_tex,
paper_pdf + "\n" + slides_pdf,
paper_pdf,
slides_pdf,
)
def _run_agentic_pipeline_stream(
topic: str,
model: str,
base_url: str,
api_key: str,
request_budget: float,
use_mock: bool,
):
status = "Agentic pipeline running..."
progress_log = "1/3 Starting workflow"
empty_json = ""
empty_tex = ""
empty_paths = ""
yield (
status,
progress_log,
empty_json,
empty_json,
empty_tex,
empty_tex,
empty_paths,
"",
"",
5,
)
progress_log = "1/3 API scope and section planning"
yield (
status,
progress_log,
empty_json,
empty_json,
empty_tex,
empty_tex,
empty_paths,
"",
"",
30,
)
events: Queue[dict[str, object]] = Queue()
def on_stage(payload: dict[str, object]) -> None:
events.put(payload)
with ThreadPoolExecutor(max_workers=1) as pool:
fut = pool.submit(
_run_agentic_pipeline,
topic,
model,
base_url,
api_key,
request_budget,
use_mock,
None,
on_stage,
)
wait_tick = 0
while not fut.done() or not events.empty():
try:
ev = events.get(timeout=1.0)
yield (
str(ev.get("status", "Agentic pipeline running...")),
str(ev.get("progress_log", "")),
str(ev.get("scope", "")),
str(ev.get("sections", "")),
str(ev.get("paper", "")),
str(ev.get("slides", "")),
str(ev.get("pdf_paths", "")),
str(ev.get("paper_pdf", "")),
str(ev.get("slides_pdf", "")),
int(str(ev.get("progress", "0"))),
)
continue
except Empty:
pass
wait_tick += 1
elapsed_s = wait_tick
heartbeat_pct = min(95, 30 + wait_tick)
yield (
"Agentic pipeline running...",
f"2/3 Running agent workflow ({elapsed_s}s elapsed)",
empty_json,
empty_json,
empty_tex,
empty_tex,
empty_paths,
"",
"",
heartbeat_pct,
)
time.sleep(1)
(
status2,
progress2,
scope2,
sections2,
paper2,
slides2,
paths2,
paper_pdf2,
slides_pdf2,
) = fut.result()
done_log = "3/3 Completed"
if progress2.strip():
done_log = progress2 + "\n" + done_log
yield (
status2,
done_log,
scope2,
sections2,
paper2,
slides2,
paths2,
paper_pdf2,
slides_pdf2,
100,
)
def _run_pipeline(
topic: str,
model: str,
base_url: str,
api_key: str,
max_sources: int,
iterations: int,
llm_timeout: float,
request_budget: float,
seed_urls_text: str,
use_mock: bool,
) -> tuple[str, str, str, str]:
if not topic.strip():
return "Topic is required.", "", "", ""
selected_base_url = base_url.strip() or resolve_base_url("https://api.example.com")
selected_api_key = api_key.strip() or resolve_api_key()
if not use_mock:
preflight_error = _preflight_check(selected_base_url, selected_api_key, request_budget)
if preflight_error is not None:
return f"Preflight failed: {preflight_error}", "", "", ""
with tempfile.TemporaryDirectory() as td:
out_zip = Path(td) / "hydradeck_out.zip"
seeds = [x.strip() for x in seed_urls_text.splitlines() if x.strip()]
cfg = RunConfig(
topic=topic.strip(),
out=out_zip,
base_url=selected_base_url,
api_key=selected_api_key,
model=model.strip() or resolve_model("grok-4"),
iterations=max(1, int(iterations)),
max_sources=max(1, int(max_sources)),
llm_timeout_s=float(llm_timeout),
request_budget_s=float(request_budget),
use_mock=bool(use_mock),
seed_urls=seeds or None,
progress=False,
quality_gate=False,
archive_snapshots=False,
)
retry_cfg = RunConfig(
topic=cfg.topic,
out=cfg.out,
base_url=cfg.base_url,
api_key=cfg.api_key,
model=cfg.model,
iterations=cfg.iterations,
max_sources=cfg.max_sources,
module_sources=cfg.module_sources,
min_total_words=cfg.min_total_words,
use_mock=cfg.use_mock,
verbose=cfg.verbose,
llm_timeout_s=max(cfg.llm_timeout_s, 90.0),
facts_max_pages=cfg.facts_max_pages,
facts_max_chars_per_page=cfg.facts_max_chars_per_page,
facts_target=cfg.facts_target,
judge_max_chars=cfg.judge_max_chars,
pre_tex_quality_gate=cfg.pre_tex_quality_gate,
pre_tex_min_score=cfg.pre_tex_min_score,
pre_tex_attempts=cfg.pre_tex_attempts,
keep_stage=cfg.keep_stage,
verbatim=cfg.verbatim,
archive_prompts=cfg.archive_prompts,
archive_snapshots=cfg.archive_snapshots,
snapshot_timeout_s=cfg.snapshot_timeout_s,
snapshot_total_timeout_s=cfg.snapshot_total_timeout_s,
auto=cfg.auto,
auto_queries=cfg.auto_queries,
auto_models=cfg.auto_models,
quality_gate=cfg.quality_gate,
min_quality_score=cfg.min_quality_score,
max_quality_attempts=cfg.max_quality_attempts,
query_count=cfg.query_count,
max_query_modules=cfg.max_query_modules,
sources_attempts=cfg.sources_attempts,
max_total_runtime_s=max(cfg.max_total_runtime_s, 420.0),
progress=cfg.progress,
request_budget_s=max(cfg.request_budget_s, 35.0),
pdf_compiler=cfg.pdf_compiler,
template=cfg.template,
seed_urls=cfg.seed_urls,
)
try:
_ = run(cfg)
except Exception as exc:
err_text = str(exc)
retryable = ("Read timed out" in err_text) or ("timed out" in err_text.lower())
if (not use_mock) and retryable:
try:
_ = run(retry_cfg)
except Exception as retry_exc:
return (
"Run failed after retry: "
f"{retry_exc}. Try request_budget >= 35 and llm_timeout >= 90.",
"",
"",
"",
)
else:
return (
"Run failed: "
f"{exc}. If queue waits too long, try Use mock (offline) or increase Request budget.",
"",
"",
"",
)
with zipfile.ZipFile(out_zip, "r") as z:
report_md = z.read("report.md").decode("utf-8", errors="replace")
paper_tex = z.read("paper.tex").decode("utf-8", errors="replace")
slides_tex = z.read("slides.tex").decode("utf-8", errors="replace")
copy_zip = Path("/tmp") / "hydradeck_space_output.zip"
copy_zip.write_bytes(out_zip.read_bytes())
status = f"Done. Output zip: {copy_zip}"
return status, report_md, paper_tex, slides_tex