ex-sheet / app.py
Salem Lahlou
cover page: share header bar, drop redundant title, dotfill barèmes
421df9a
"""
photos -> latex
Gradio app: upload photos, extract structured exam data via the Anthropic
API (Claude Sonnet, vision + tool use), render it into LaTeX from a fixed
template, compile to PDF. Supports iterative self-verification and
natural-language modifications.
"""
import base64
import io
import json
import os
import shutil
import subprocess
import tempfile
import time
from pathlib import Path
import gradio as gr
from anthropic import Anthropic
from PIL import Image
try:
from pillow_heif import register_heif_opener
register_heif_opener()
except ImportError:
pass
# ---------------------------------------------------------------------------
# Configuration
# ---------------------------------------------------------------------------
MODEL = "claude-sonnet-4-6"
MAX_TOKENS = 16000
MAX_IMAGES = 5
VERIFY_BUDGET_S = 120
VERIFY_MAX_ITERATIONS = 4
VERIFY_MAX_FIGURE_REFINEMENTS = 1
MAX_IMAGE_DIM = 1568
MAX_KEPT_WORKDIRS = 20 # higher for version history
SKILL_PATH = Path(__file__).parent / "skill.md"
SYSTEM_PROMPT = SKILL_PATH.read_text(encoding="utf-8")
_client: Anthropic | None = None
def get_client() -> Anthropic:
global _client
if _client is None:
if not os.environ.get("ANTHROPIC_API_KEY"):
raise RuntimeError(
"ANTHROPIC_API_KEY n'est pas défini. "
"Définis ce secret dans les paramètres du HF Space."
)
_client = Anthropic()
return _client
# ---------------------------------------------------------------------------
# Tool schemas
# ---------------------------------------------------------------------------
_FIGURE_ITEM = {
"type": "object",
"properties": {
"id": {"type": "string", "description": "Short unique name, e.g. 'fig1'"},
"image_index": {"type": "integer", "description": "0-based index into uploaded photos"},
"bbox": {
"type": "array", "items": {"type": "number"},
"minItems": 4, "maxItems": 4,
"description": "[x1,y1,x2,y2] fractional; (0,0)=top-left, (1,1)=bottom-right",
},
},
"required": ["id", "image_index", "bbox"],
}
_ROW_ITEM = {
"type": "object",
"properties": {
"bareme": {"type": "string", "description": "Per-question barème (e.g. '0,5'). Empty '' if none."},
"content": {"type": "string", "description": "Question text with inline $math$ and numbering."},
"figure_id": {"type": "string", "description": "If set, this row is a blank-box figure."},
"figure_width_cm": {"type": "number", "description": "Width of the blank box in cm."},
},
"required": ["content"],
}
_SECTION_ITEM = {
"type": "object",
"properties": {
"title": {"type": "string", "description": "e.g. 'Exercice1', 'Partie II :', 'Problème'"},
"bareme": {"type": "string", "description": "Per-exercise barème (e.g. '3points'). '' if per-question."},
"intro": {"type": "string", "description": "Optional setup text before questions. '' if none."},
"rows": {"type": "array", "items": _ROW_ITEM, "description": "Questions/sub-questions in order"},
},
"required": ["title", "rows"],
}
_INTRO_PAGE = {
"type": "object",
"description": (
"Optional cover/intro page content. Set ONLY when the first photo "
"is a standalone cover (no questions) — otherwise omit this field."
),
"properties": {
"title": {"type": "string"},
"subtitle": {"type": "string"},
"info_rows": {
"type": "array",
"items": {
"type": "object",
"properties": {
"label": {"type": "string"},
"value": {"type": "string"},
},
"required": ["label", "value"],
},
},
"bullets": {
"type": "array",
"items": {
"type": "object",
"properties": {
"description": {"type": "string"},
"bareme": {"type": "string"},
},
"required": ["description"],
},
},
"footer": {"type": "string"},
},
}
EXAM_TOOL = {
"name": "submit_exam",
"description": (
"Submit extracted exam data. The pipeline renders it into LaTeX "
"from a fixed template — you never write LaTeX structure yourself."
),
"input_schema": {
"type": "object",
"properties": {
"header": {
"type": "object",
"properties": {
"top_left": {"type": "string"},
"school": {"type": "string"},
"exam_info": {"type": "string"},
"right": {"type": "string"},
},
"required": ["top_left", "school", "exam_info", "right"],
},
"sections": {"type": "array", "items": _SECTION_ITEM},
"figures": {"type": "array", "items": _FIGURE_ITEM},
"intro_page": _INTRO_PAGE,
"changelog": {
"type": "string",
"description": (
"Brief description of what was changed (for modifications). "
"Leave empty for initial extraction."
),
},
},
"required": ["header", "sections", "figures"],
},
}
VERDICT_TOOL = {
"name": "submit_verdict",
"description": (
"Verdict on whether the rendered PDF matches the original photos. "
"OK = faithful. FIX = provide corrected exam data."
),
"input_schema": {
"type": "object",
"properties": {
"verdict": {"type": "string", "enum": ["OK", "FIX"]},
"exam_data": {
"type": "object",
"description": "Required if verdict=FIX. Full corrected exam data (same schema as submit_exam).",
"properties": {
"header": EXAM_TOOL["input_schema"]["properties"]["header"],
"sections": EXAM_TOOL["input_schema"]["properties"]["sections"],
"figures": EXAM_TOOL["input_schema"]["properties"]["figures"],
"intro_page": _INTRO_PAGE,
},
},
},
"required": ["verdict"],
},
}
# ---------------------------------------------------------------------------
# LaTeX renderer — the fixed template
# ---------------------------------------------------------------------------
_PREAMBLE = r"""\documentclass[12pt,a4paper]{article}
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage[french]{babel}
\usepackage{amsmath,amssymb,amsfonts}
\usepackage{geometry}
\usepackage{enumitem}
\usepackage{array}
\usepackage{tabularx}
\usepackage{longtable}
\usepackage{graphicx}
\usepackage{lastpage}
\geometry{left=1.5cm, right=1.5cm, top=1cm, bottom=1cm,
headheight=0pt, headsep=0pt, footskip=0pt}
\pagestyle{empty}
\setlength{\parindent}{0pt}
\setlength{\tabcolsep}{4pt}
\newlength{\baremeW}
\setlength{\baremeW}{1.6cm}
\newcounter{exampage}
\setcounter{exampage}{0}
\newcommand{\bumppage}{\stepcounter{exampage}\theexampage/\pageref{LastPage}}
\begin{document}
\setlength{\LTleft}{0pt}
\setlength{\LTright}{0pt}
\setlength{\LTpre}{0pt}
\setlength{\LTpost}{0pt}
\setcounter{LTchunksize}{100}
"""
_POSTAMBLE = r"""
\end{longtable}
\end{document}
"""
# Map of Unicode characters that pdflatex+T1 can't handle to their LaTeX equivalents.
_UNICODE_TO_LATEX = {
"Ω": r"$\Omega$", "ω": r"$\omega$", "α": r"$\alpha$", "β": r"$\beta$",
"γ": r"$\gamma$", "δ": r"$\delta$", "π": r"$\pi$", "φ": r"$\varphi$",
"ε": r"$\varepsilon$", "θ": r"$\theta$", "λ": r"$\lambda$", "μ": r"$\mu$",
"σ": r"$\sigma$", "τ": r"$\tau$", "Δ": r"$\Delta$", "Σ": r"$\Sigma$",
"∞": r"$\infty$", "√": r"$\sqrt{}$", "×": r"$\times$", "÷": r"$\div$",
"≤": r"$\leq$", "≥": r"$\geq$", "≠": r"$\neq$", "≈": r"$\approx$",
"∈": r"$\in$", "∉": r"$\notin$", "⊂": r"$\subset$", "∪": r"$\cup$",
"∩": r"$\cap$", "∅": r"$\emptyset$", "∀": r"$\forall$", "∃": r"$\exists$",
"→": r"$\to$", "←": r"$\leftarrow$", "⇒": r"$\Rightarrow$",
"–": "--", "—": "---", "…": r"\dots ", "°": r"\textdegree{}",
"\u200b": "", "\u00a0": "~", # zero-width space, non-breaking space
}
def _sanitize_latex(s: str) -> str:
"""Replace Unicode characters that pdflatex+T1 can't render."""
for char, replacement in _UNICODE_TO_LATEX.items():
s = s.replace(char, replacement)
return s
def _sanitize_text(s: str) -> str:
"""
One-stop sanitizer for any text going into the LaTeX template.
Handles: Unicode → LaTeX, real newlines → \\newline.
Must be applied to EVERY text field (header, title, intro, content, barème).
"""
# 1. Unicode symbols → LaTeX equivalents
for char, replacement in _UNICODE_TO_LATEX.items():
s = s.replace(char, replacement)
# 2. Real newline chars → LaTeX \newline (single replace only — see
# the double-replace trap comment above the mapping table).
s = s.replace("\n", r" \newline ")
return s
def _header_tabularx(h: dict) -> str:
"""The four-cell tabularx that draws the header bar. Shared by the
longtable (wrapped in \\multicolumn) and the cover page (standalone)."""
sc = _sanitize_text(h.get("school", ""))
ei = _sanitize_text(h.get("exam_info", ""))
rt = _sanitize_text(h.get("right", ""))
# Use {\itshape ...} rather than \textit{...}: \textit parses its
# argument via \check@nocorr@, which chokes on \begin{...} environments
# (e.g. \begin{cases}) — we've seen it break the whole longtable.
return "\n".join([
r"\noindent\begin{tabularx}{\textwidth}"
r"{|>{\centering\arraybackslash}m{\baremeW}"
r"|>{\centering\arraybackslash}X"
r"|>{\centering\arraybackslash}X"
r"|>{\centering\arraybackslash}X|}",
r"\hline",
rf"{{\itshape \bumppage}} & {{\itshape {sc}}} & "
rf"{{\itshape {ei}}} & {{\itshape {rt}}} \tabularnewline",
r"\hline",
r"\end{tabularx}",
])
def _render_header_bar(h: dict) -> str:
"""Render the header bar as a longtable row (wrapped in \\multicolumn)."""
return "\n".join([
r"\multicolumn{2}{@{}l@{}}{%",
_header_tabularx(h),
r"}\\*[-\arrayrulewidth]",
r"\hline",
])
def _render_intro_page(intro: dict, header: dict) -> str:
"""Render the optional cover page. Shares the header bar with the
longtable so the cover doesn't need its own title. Ends with
\\newpage so the longtable starts on a fresh page."""
subtitle = _sanitize_text(intro.get("subtitle", ""))
info_rows = intro.get("info_rows", []) or []
bullets = intro.get("bullets", []) or []
footer = _sanitize_text(intro.get("footer", ""))
out: list[str] = []
out.append(r"\thispagestyle{empty}")
# Same header bar as the longtable pages — \bumppage auto-increments
# so the cover shows 1/N and the longtable resumes at 2/N.
out.append(_header_tabularx(header))
out.append(r"\vspace*{1.5cm}")
out.append(r"\begingroup")
out.append(r"\linespread{1.5}\selectfont")
if subtitle:
out.append(r"\begin{center}{\LARGE " + subtitle + r"}\end{center}")
out.append(r"\vspace{1cm}")
if info_rows:
out.append(r"\begin{center}")
out.append(r"\large")
out.append(r"\begin{tabular}{|l|l|}")
out.append(r"\hline")
for row in info_rows:
label = _sanitize_text(row.get("label", ""))
value = _sanitize_text(row.get("value", ""))
out.append(rf"\textbf{{{label}}} & {value} \\")
out.append(r"\hline")
out.append(r"\end{tabular}")
out.append(r"\end{center}")
out.append(r"\vspace{1.5cm}")
if bullets:
out.append(r"\large")
out.append(r"\begin{itemize}[leftmargin=2cm,itemsep=0.6em]")
for b in bullets:
desc = _sanitize_text(b.get("description", "") if isinstance(b, dict) else str(b))
bareme = _sanitize_text(b.get("bareme", "") if isinstance(b, dict) else "")
if bareme:
# description ……………………… (Xpts) — LaTeX \dotfill stretches the dots.
out.append(rf" \item {desc}\dotfill ({bareme})")
else:
out.append(rf" \item {desc}")
out.append(r"\end{itemize}")
out.append(r"\vspace{1.5cm}")
if footer:
out.append(r"\begin{center}{\Large\underline{" + footer + r"}}\end{center}")
out.append(r"\endgroup")
out.append(r"\newpage")
out.append("")
return "\n".join(out)
def render_tex(data: dict, include_intro: bool = False) -> str:
"""
Deterministically render structured exam data into a complete .tex file.
Claude never writes LaTeX — this function owns all formatting.
"""
header = data.get("header", {})
sections = data.get("sections", [])
intro_page = data.get("intro_page")
lines: list[str] = [_PREAMBLE]
if include_intro and intro_page:
lines.append(_render_intro_page(intro_page, header))
# Longtable column spec
lines.append(
r"\begin{longtable}{|>{\centering\arraybackslash}m{\baremeW}"
r"|p{\dimexpr\textwidth-\baremeW-4\tabcolsep-3\arrayrulewidth\relax}|}"
)
lines.append("")
# First-page header
lines.append("% ----- Header (first page) -----")
lines.append(_render_header_bar(header))
lines.append(r"\endfirsthead")
lines.append("")
# Subsequent-page header
lines.append("% ----- Header (subsequent pages) -----")
lines.append(_render_header_bar(header))
lines.append(r"\endhead")
lines.append("")
# Sections
for si, section in enumerate(sections):
title = _sanitize_text(section.get("title", ""))
bareme_global = _sanitize_text(section.get("bareme", ""))
intro = _sanitize_text(section.get("intro", ""))
rows = section.get("rows", [])
lines.append(f"% =========== {title} ===========")
# Title row: global barème (if per-exercise) or empty
if bareme_global:
lines.append(rf"{{\itshape {bareme_global}}}")
else:
lines.append("")
lines.append("&")
lines.append(rf"\underline{{\textbf{{{title}}}}}")
# Intro paragraph (in same cell as title)
if intro:
lines.append("")
lines.append(rf"{{\itshape {intro}}}")
lines.append(r"\\") # softrow after title
# Content rows
for row in rows:
rb = _sanitize_text(row.get("bareme", ""))
fig_id = row.get("figure_id", "")
# Barème cell
if rb:
lines.append(rf"{{\itshape {rb}}}")
else:
lines.append("")
lines.append("&")
# Content cell
if fig_id:
w = row.get("figure_width_cm", 7)
lines.append(
rf"\begin{{center}}"
rf"\includegraphics[width={w}cm]{{{fig_id}.png}}"
rf"\end{{center}}"
)
else:
content = _sanitize_text(row.get("content", ""))
lines.append(rf"{{\itshape {content}}}")
lines.append(r"\\") # softrow
# Horizontal line between sections
lines.append(r"\hline")
lines.append("")
lines.append(_POSTAMBLE)
return "\n".join(lines)
# ---------------------------------------------------------------------------
# Image helpers
# ---------------------------------------------------------------------------
# Max longest-side for the on-disk copy used by every downstream step.
# Keeps annotate_bboxes output under Anthropic's 5 MB/image limit in the
# verify step while still being sharp enough to see bbox overlays.
UPLOAD_MAX_DIM = 2200
def _downscale_to_tempfile(path: str, max_dim: int = UPLOAD_MAX_DIM) -> str:
"""EXIF-transpose + RGB + cap longest side, save JPEG to a tempdir.
Returns a new path; original is left untouched."""
from PIL import ImageOps
img = Image.open(path)
try:
img = ImageOps.exif_transpose(img)
except Exception:
pass
if img.mode not in ("RGB", "L"):
img = img.convert("RGB")
w, h = img.size
longest = max(w, h)
if longest > max_dim:
ratio = max_dim / longest
img = img.resize(
(max(1, int(w * ratio)), max(1, int(h * ratio))),
Image.LANCZOS,
)
tmpdir = tempfile.mkdtemp(prefix="img_")
out = Path(tmpdir) / (Path(path).stem + ".jpg")
img.save(out, format="JPEG", quality=92, optimize=True)
return str(out)
def _prepare_images(files) -> list[str]:
"""
Convert uploaded files to a flat list of image paths.
PDFs are split into per-page PNGs via pdftoppm; images pass through.
Every resulting image is downscaled to UPLOAD_MAX_DIM so downstream
calls (annotate_bboxes, the verify step) stay within API limits.
"""
image_paths: list[str] = []
for f in files:
path = f.name
if path.lower().endswith(".pdf"):
tmpdir = tempfile.mkdtemp(prefix="pdf2img_")
subprocess.run(
["pdftoppm", "-png", "-r", "200", path,
str(Path(tmpdir) / "page")],
capture_output=True,
)
pages = sorted(Path(tmpdir).glob("page-*.png"))
for p in pages:
try:
image_paths.append(_downscale_to_tempfile(str(p)))
except Exception:
image_paths.append(str(p))
else:
try:
image_paths.append(_downscale_to_tempfile(path))
except Exception:
image_paths.append(path)
return image_paths
def _image_block(path: str) -> dict:
img = Image.open(path)
try:
from PIL import ImageOps
img = ImageOps.exif_transpose(img)
except Exception:
pass
if img.mode not in ("RGB", "L"):
img = img.convert("RGB")
w, h = img.size
longest = max(w, h)
if longest > MAX_IMAGE_DIM:
ratio = MAX_IMAGE_DIM / longest
img = img.resize(
(max(1, int(w * ratio)), max(1, int(h * ratio))),
Image.LANCZOS,
)
buf = io.BytesIO()
img.save(buf, format="JPEG", quality=90, optimize=True)
data = base64.standard_b64encode(buf.getvalue()).decode("ascii")
return {
"type": "image",
"source": {"type": "base64", "media_type": "image/jpeg", "data": data},
}
def annotate_bboxes(image_path: str, figures: list[dict], image_index: int) -> Image.Image:
from PIL import ImageDraw, ImageFont, ImageOps
img = Image.open(image_path)
try:
img = ImageOps.exif_transpose(img)
except Exception:
pass
if img.mode not in ("RGB", "RGBA"):
img = img.convert("RGB")
img = img.copy()
draw = ImageDraw.Draw(img)
w, h = img.size
line_w = max(4, int(min(w, h) * 0.005))
try:
font = ImageFont.load_default(size=max(20, int(min(w, h) * 0.025)))
except Exception:
font = ImageFont.load_default()
for fig in figures:
if int(fig.get("image_index", 0)) != image_index:
continue
bbox = fig.get("bbox") or [0, 0, 1, 1]
x1 = int(max(0, min(1, float(bbox[0]))) * w)
y1 = int(max(0, min(1, float(bbox[1]))) * h)
x2 = int(max(0, min(1, float(bbox[2]))) * w)
y2 = int(max(0, min(1, float(bbox[3]))) * h)
draw.rectangle([x1, y1, x2, y2], outline="red", width=line_w)
label = str(fig.get("id", ""))
if label:
tx, ty = x1 + line_w + 2, y1 + line_w + 2
draw.rectangle([tx - 2, ty - 2, tx + len(label) * 14, ty + 22], fill="red")
draw.text((tx, ty), label, fill="white", font=font)
return img
def make_blank_box(image_path: str, bbox: list[float]) -> Image.Image:
img = Image.open(image_path)
try:
from PIL import ImageOps
img = ImageOps.exif_transpose(img)
except Exception:
pass
src_w, src_h = img.size
img.close()
x1f = max(0, min(1, float(bbox[0])))
y1f = max(0, min(1, float(bbox[1])))
x2f = max(0, min(1, float(bbox[2])))
y2f = max(0, min(1, float(bbox[3])))
px_w = max(1, int((x2f - x1f) * src_w))
px_h = max(1, int((y2f - y1f) * src_h))
target_long = 800
longest = max(px_w, px_h)
if longest > target_long:
scale = target_long / longest
px_w = max(1, int(px_w * scale))
px_h = max(1, int(px_h * scale))
box = Image.new("RGB", (px_w, px_h), color="white")
from PIL import ImageDraw
draw = ImageDraw.Draw(box)
bw = max(2, int(min(px_w, px_h) * 0.005))
draw.rectangle([0, 0, px_w - 1, px_h - 1], outline="black", width=bw)
return box
def _extract_tool_call(response, tool_name: str) -> dict | None:
for block in response.content:
if getattr(block, "type", None) == "tool_use" and getattr(block, "name", None) == tool_name:
return dict(block.input)
return None
def _extract_text(response) -> str:
parts = []
for block in response.content:
text = getattr(block, "text", None)
if text:
parts.append(text)
return "".join(parts)
# ---------------------------------------------------------------------------
# Anthropic calls
# ---------------------------------------------------------------------------
_INITIAL_PROMPT = (
"Voici des photos d'une page à convertir. "
"Extrais le contenu structuré et appelle l'outil `submit_exam`. "
"Suis rigoureusement les règles du prompt système."
)
def call_claude_initial(image_paths: list[str]) -> dict:
"""Send images, get structured exam data via tool use."""
content = [_image_block(p) for p in image_paths]
content.append({"type": "text", "text": _INITIAL_PROMPT})
response = get_client().messages.create(
model=MODEL, max_tokens=MAX_TOKENS,
tools=[EXAM_TOOL],
tool_choice={"type": "tool", "name": "submit_exam"},
system=SYSTEM_PROMPT,
messages=[{"role": "user", "content": content}],
)
payload = _extract_tool_call(response, "submit_exam")
if payload is None:
raise RuntimeError("Claude n'a pas appelé submit_exam.")
return payload
def call_claude_verify(
image_paths: list[str], pdf_pages: list[str], figure_paths: list[str],
current_figures: list[dict], current_data: dict,
allow_figure_refinement: bool,
) -> tuple[str, dict | None]:
"""Returns ("OK", None) or ("FIX", new_data) or ("FAIL", error_str)."""
content: list[dict] = [
{"type": "text", "text": "VÉRIFICATION. Photos ORIGINALES avec bboxes annotées en rouge :"}
]
for idx, src in enumerate(image_paths):
annotated = annotate_bboxes(src, current_figures, idx)
buf = io.BytesIO()
annotated.save(buf, format="JPEG", quality=85, optimize=True)
b64 = base64.standard_b64encode(buf.getvalue()).decode("ascii")
content.append({"type": "image", "source": {"type": "base64", "media_type": "image/jpeg", "data": b64}})
if figure_paths:
content.append({"type": "text", "text": "Figures extraites (blank boxes) :"})
for p in figure_paths:
content.append(_image_block(p))
content.append({"type": "text", "text": "PDF rendu :"})
for p in pdf_pages:
content.append(_image_block(p))
content.append({
"type": "text",
"text": (
"\n\nDonnées actuelles :\n```json\n"
+ json.dumps(current_data, ensure_ascii=False, indent=2)[:8000]
+ "\n```\n\n"
"Compare le PDF rendu aux photos originales. "
"Vérifie texte, formules, barème, figures. "
"Appelle `submit_verdict` : verdict=OK si correct, "
"verdict=FIX + exam_data corrigé sinon."
),
})
response = get_client().messages.create(
model=MODEL, max_tokens=MAX_TOKENS,
tools=[VERDICT_TOOL],
tool_choice={"type": "tool", "name": "submit_verdict"},
system=SYSTEM_PROMPT,
messages=[{"role": "user", "content": content}],
)
payload = _extract_tool_call(response, "submit_verdict")
if payload is None:
return ("FAIL", "Claude n'a pas appelé submit_verdict.")
verdict = (payload.get("verdict") or "").strip().upper()
if verdict == "OK":
return ("OK", None)
if verdict == "FIX":
new_data = payload.get("exam_data")
if not new_data or "sections" not in new_data:
return ("FAIL", "Verdict=FIX sans exam_data valide.")
if not allow_figure_refinement and "figures" in new_data:
new_data["figures"] = current_data.get("figures", [])
return ("FIX", new_data)
return ("FAIL", f"Verdict inconnu : {verdict!r}.")
def call_claude_modify(current_data: dict, instruction: str) -> dict:
"""Send current data + instruction, get modified data with changelog."""
user_message = (
"Voici les données structurées actuelles du document :\n\n"
"```json\n" + json.dumps(current_data, ensure_ascii=False, indent=2) + "\n```\n\n"
f"Demande de modification : {instruction}\n\n"
"Modifie les données en conséquence et appelle `submit_exam` "
"avec les données complètes mises à jour. "
"N'apporte QUE les changements demandés.\n\n"
"IMPORTANT : remplis le champ `changelog` avec une description "
"courte (1-2 phrases en français) de ce que tu as changé. "
"Exemple : 'Remplacé la date par Mai 2026 dans l'en-tête.'"
)
response = get_client().messages.create(
model=MODEL, max_tokens=MAX_TOKENS,
tools=[EXAM_TOOL],
tool_choice={"type": "tool", "name": "submit_exam"},
system=SYSTEM_PROMPT,
messages=[{"role": "user", "content": user_message}],
)
payload = _extract_tool_call(response, "submit_exam")
if payload is None:
raise RuntimeError("Claude n'a pas appelé submit_exam pour la modification.")
return payload
# ---------------------------------------------------------------------------
# Compilation
# ---------------------------------------------------------------------------
_workdirs: list[str] = []
def _prune_workdirs():
while len(_workdirs) > MAX_KEPT_WORKDIRS:
old = _workdirs.pop(0)
shutil.rmtree(old, ignore_errors=True)
def compile_pdf(
data: dict,
image_paths: list[str] | None = None,
prev_workdir: str | None = None,
include_intro: bool = False,
) -> tuple[str | None, str, str, str | None]:
"""
Render data → .tex, materialize figures, compile with pdflatex.
Returns (pdf_path, tex_path, workdir, error).
"""
workdir = tempfile.mkdtemp(prefix="latex_")
_workdirs.append(workdir)
_prune_workdirs()
figures = data.get("figures", [])
# Materialize figures as blank boxes
if figures and image_paths:
for fig in figures:
try:
idx = int(fig.get("image_index", 0))
if 0 <= idx < len(image_paths):
box = make_blank_box(image_paths[idx], fig["bbox"])
box.save(Path(workdir) / f"{fig['id']}.png", "PNG", optimize=True)
except Exception as e:
print(f"figure {fig.get('id', '?')}: {e}")
elif prev_workdir and Path(prev_workdir).is_dir():
for png in Path(prev_workdir).glob("*.png"):
if not png.name.startswith("preview-"):
shutil.copy(png, Path(workdir) / png.name)
# Render and write .tex
tex_source = render_tex(data, include_intro=include_intro)
tex_path = Path(workdir) / "doc.tex"
tex_path.write_text(tex_source, encoding="utf-8")
# Compile
log = ""
for _ in range(2):
result = subprocess.run(
["pdflatex", "-interaction=nonstopmode", "doc.tex"],
cwd=workdir, capture_output=True, text=True,
encoding="utf-8", errors="replace",
)
log = result.stdout
pdf_path = Path(workdir) / "doc.pdf"
if pdf_path.exists():
return str(pdf_path), str(tex_path), workdir, None
error_lines = [l for l in log.split("\n") if l.startswith("!") or "Error" in l]
return None, str(tex_path), workdir, "\n".join(error_lines[:25]) or log[-2000:]
def render_pdf_pages(pdf_path: str) -> list[str]:
workdir = Path(pdf_path).parent
for stale in workdir.glob("preview-*.png"):
try:
stale.unlink()
except OSError:
pass
prefix = workdir / "preview"
result = subprocess.run(
["pdftoppm", "-png", "-r", "120", pdf_path, str(prefix)],
capture_output=True, text=True,
encoding="utf-8", errors="replace",
)
if result.returncode != 0:
return []
return [str(p) for p in sorted(workdir.glob("preview-*.png"))]
# ---------------------------------------------------------------------------
# Gradio handlers — all return 9 values:
# pdf_gallery, pdf_download, tex_download, data_state, status,
# workdir_state, versions_state, version_dropdown, intro_toggle
# ---------------------------------------------------------------------------
def _make_header(school, teacher, date, doc_type):
return {
"top_left": "",
"school": school,
"exam_info": doc_type,
"right": f"{teacher}\nDate : {date}" if teacher else f"Date : {date}",
}
def _intro_toggle_update(data: dict | None, include_intro: bool):
"""Show the 'include intro' checkbox iff intro_page was detected."""
visible = bool(data and data.get("intro_page"))
return gr.update(visible=visible, value=bool(include_intro) if visible else False)
def _err(msg, versions_json="[]"):
"""Return a 9-tuple for error/empty states (leaves intro toggle unchanged)."""
return [], None, None, "", msg, "", versions_json, gr.update(), gr.update()
def handle_upload(files, school, teacher, date, doc_type, progress=gr.Progress()):
if not files:
return _err("Upload au moins une image ou un PDF.")
if len(files) > MAX_IMAGES:
return _err(f"Maximum {MAX_IMAGES} fichiers.")
image_paths = _prepare_images(files)
started = time.time()
progress(0.1, desc="Extraction des données…")
try:
data = call_claude_initial(image_paths)
except Exception as exc:
return _err(f"Erreur API : {exc}")
user_header = _make_header(school, teacher, date, doc_type)
data["header"] = user_header
if not data.get("sections"):
return _err("Aucune section extraite.")
progress(0.3, desc="Compilation…")
try:
pdf_path, tex_path, workdir, error = compile_pdf(data, image_paths=image_paths)
except Exception as exc:
return _err(f"Erreur pendant la compilation : {exc}")
if error:
return [], None, tex_path, json.dumps(data, ensure_ascii=False), f"Compilation échouée :\n{error}", workdir, "[]", gr.update(), _intro_toggle_update(data, False)
pages = render_pdf_pages(pdf_path)
data_json = json.dumps(data, ensure_ascii=False)
# Iterative verify
current_figures = list(data.get("figures", []))
figure_refinements = 0
fixes = 0
while True:
elapsed = time.time() - started
if fixes >= VERIFY_MAX_ITERATIONS or elapsed >= VERIFY_BUDGET_S:
break
frac = min(0.9, 0.4 + fixes * 0.15)
progress(frac, desc=f"Vérification (correction {fixes + 1})…")
fig_paths = [str(p) for p in sorted(Path(workdir).glob("*.png")) if "preview-" not in p.name]
allow_fig = figure_refinements < VERIFY_MAX_FIGURE_REFINEMENTS
try:
verdict, new_data = call_claude_verify(
image_paths, pages, fig_paths, current_figures, data, allow_fig
)
except Exception:
break
if verdict == "OK":
progress(1.0, desc="Terminé")
break
if verdict != "FIX" or not new_data:
break
fixes += 1
new_data["header"] = user_header
new_figures = new_data.get("figures", [])
has_new_figs = new_figures != current_figures and allow_fig
progress(frac + 0.05, desc=f"Compilation correction {fixes}…")
if has_new_figs:
new_pdf, new_tex, new_wd, err = compile_pdf(new_data, image_paths=image_paths)
current_figures = list(new_figures)
figure_refinements += 1
else:
new_data["figures"] = current_figures
new_pdf, new_tex, new_wd, err = compile_pdf(new_data, prev_workdir=workdir)
if err:
break
data = new_data
data_json = json.dumps(data, ensure_ascii=False)
pdf_path, tex_path, workdir = new_pdf, new_tex, new_wd
pages = render_pdf_pages(pdf_path)
elapsed = time.time() - started
status_msg = f"Généré en {elapsed:.0f}s ({fixes} correction(s))."
# Version history: v1
versions = [{"label": "v1 — Génération initiale", "data_json": data_json,
"pdf_path": pdf_path or "", "tex_path": tex_path or "", "workdir": workdir or ""}]
vj = json.dumps(versions, ensure_ascii=False)
dd = gr.update(choices=[v["label"] for v in versions], value=versions[0]["label"])
if data.get("intro_page"):
status_msg += " Page d'introduction détectée — coche la case pour l'inclure."
return pages, pdf_path, tex_path, data_json, status_msg, workdir, vj, dd, _intro_toggle_update(data, False)
def handle_apply_header(current_data_json, current_workdir, versions_json,
include_intro, school, teacher, date, doc_type):
if not current_data_json:
return _err("Génère d'abord un document.", versions_json)
try:
data = json.loads(current_data_json)
except json.JSONDecodeError:
return _err("Données corrompues.", versions_json)
data["header"] = _make_header(school, teacher, date, doc_type)
pdf_path, tex_path, new_workdir, error = compile_pdf(
data, prev_workdir=current_workdir or None, include_intro=bool(include_intro),
)
if error:
return [], None, tex_path, json.dumps(data, ensure_ascii=False), f"Compilation échouée :\n{error}", new_workdir, versions_json, gr.update(), _intro_toggle_update(data, include_intro)
pages = render_pdf_pages(pdf_path)
data_json = json.dumps(data, ensure_ascii=False)
versions = json.loads(versions_json) if versions_json else []
n = len(versions) + 1
v = {"label": f"v{n} — En-tête modifié", "data_json": data_json,
"pdf_path": pdf_path, "tex_path": tex_path, "workdir": new_workdir}
versions.append(v)
vj = json.dumps(versions, ensure_ascii=False)
dd = gr.update(choices=[x["label"] for x in versions], value=v["label"])
return pages, pdf_path, tex_path, data_json, "En-tête mis à jour.", new_workdir, vj, dd, _intro_toggle_update(data, include_intro)
def handle_modification(current_data_json, current_workdir, versions_json, include_intro, instruction):
if not current_data_json:
return _err("Génère d'abord un document.", versions_json)
if not instruction or not instruction.strip():
return _err("Décris la modification.", versions_json)
try:
current_data = json.loads(current_data_json)
except json.JSONDecodeError:
return _err("Données corrompues.", versions_json)
try:
new_data = call_claude_modify(current_data, instruction)
except Exception as exc:
return [], None, None, current_data_json, f"Erreur API : {exc}", current_workdir, versions_json, gr.update(), gr.update()
if not new_data.get("sections"):
return [], None, None, current_data_json, "Modification invalide.", current_workdir, versions_json, gr.update(), gr.update()
# Preserve the intro page across modifications unless Claude explicitly
# rewrote it (the modify tool doesn't normally touch it).
if "intro_page" not in new_data and current_data.get("intro_page"):
new_data["intro_page"] = current_data["intro_page"]
changelog = new_data.pop("changelog", "") or instruction[:80]
pdf_path, tex_path, new_workdir, error = compile_pdf(
new_data, prev_workdir=current_workdir or None, include_intro=bool(include_intro),
)
if error:
return [], None, tex_path, json.dumps(new_data, ensure_ascii=False), f"Compilation échouée :\n{error}", new_workdir, versions_json, gr.update(), _intro_toggle_update(new_data, include_intro)
pages = render_pdf_pages(pdf_path)
data_json = json.dumps(new_data, ensure_ascii=False)
versions = json.loads(versions_json) if versions_json else []
n = len(versions) + 1
v = {"label": f"v{n}{changelog[:60]}", "data_json": data_json,
"pdf_path": pdf_path, "tex_path": tex_path, "workdir": new_workdir}
versions.append(v)
vj = json.dumps(versions, ensure_ascii=False)
dd = gr.update(choices=[x["label"] for x in versions], value=v["label"])
return pages, pdf_path, tex_path, data_json, f"Modification : {changelog}", new_workdir, vj, dd, _intro_toggle_update(new_data, include_intro)
def handle_version_select(version_label, versions_json, include_intro):
"""Restore a previous version from the dropdown."""
if not versions_json:
return _err("Aucun historique.")
versions = json.loads(versions_json)
v = next((x for x in versions if x["label"] == version_label), None)
if v is None:
return _err(f"Version introuvable.", versions_json)
pdf_path = v.get("pdf_path", "")
tex_path = v.get("tex_path", "")
workdir = v.get("workdir", "")
data_json = v.get("data_json", "")
pages = render_pdf_pages(pdf_path) if pdf_path and Path(pdf_path).exists() else []
dd = gr.update(choices=[x["label"] for x in versions], value=version_label)
try:
restored_data = json.loads(data_json) if data_json else {}
except json.JSONDecodeError:
restored_data = {}
return (
pages,
pdf_path if pdf_path and Path(pdf_path).exists() else None,
tex_path if tex_path and Path(tex_path).exists() else None,
data_json,
f"Restauré : {version_label}",
workdir,
versions_json,
dd,
_intro_toggle_update(restored_data, include_intro),
)
def handle_intro_toggle(current_data_json, current_workdir, versions_json, include_intro):
"""Recompile the current data with the new include_intro setting.
Lightweight — no Claude call, just render + pdflatex."""
if not current_data_json:
return _err("Génère d'abord un document.", versions_json)
try:
data = json.loads(current_data_json)
except json.JSONDecodeError:
return _err("Données corrompues.", versions_json)
if not data.get("intro_page"):
# Nothing to toggle; just re-emit current state with the toggle hidden.
return _err("Aucune page d'introduction détectée.", versions_json)
try:
pdf_path, tex_path, new_workdir, error = compile_pdf(
data, prev_workdir=current_workdir or None, include_intro=bool(include_intro),
)
except Exception as exc:
return _err(f"Erreur pendant la compilation : {exc}", versions_json)
if error:
return [], None, tex_path, current_data_json, f"Compilation échouée :\n{error}", new_workdir, versions_json, gr.update(), _intro_toggle_update(data, include_intro)
pages = render_pdf_pages(pdf_path)
versions = json.loads(versions_json) if versions_json else []
n = len(versions) + 1
label_suffix = "avec page d'intro" if include_intro else "sans page d'intro"
v = {"label": f"v{n}{label_suffix}", "data_json": current_data_json,
"pdf_path": pdf_path, "tex_path": tex_path, "workdir": new_workdir}
versions.append(v)
vj = json.dumps(versions, ensure_ascii=False)
dd = gr.update(choices=[x["label"] for x in versions], value=v["label"])
status = "Page d'introduction ajoutée." if include_intro else "Page d'introduction retirée."
return pages, pdf_path, tex_path, current_data_json, status, new_workdir, vj, dd, _intro_toggle_update(data, include_intro)
# ---------------------------------------------------------------------------
# UI
# ---------------------------------------------------------------------------
with gr.Blocks(title="photos -> latex", theme=gr.themes.Soft()) as demo:
gr.Markdown("# 📝 photos -> latex")
gr.Markdown(
f"Upload des photos ou un PDF (1 à {MAX_IMAGES} fichiers). "
"L'app génère le code LaTeX et le PDF, puis te laisse demander "
"des modifications en français."
)
data_state = gr.Textbox(value="", visible=False, interactive=False)
workdir_state = gr.Textbox(value="", visible=False, interactive=False)
versions_state = gr.Textbox(value="[]", visible=False, interactive=False)
gr.Markdown("### En-tête du document")
with gr.Row():
hdr_school = gr.Textbox(label="École", value="Groupe Scolaire Charles Péguy", scale=2)
hdr_teacher = gr.Textbox(label="Enseignant(e)", value="Mme Lahbabi")
hdr_date = gr.Textbox(label="Date", value="01/05/2026")
hdr_title = gr.Textbox(label="Type", value="Fiche d'exercices")
apply_header_btn = gr.Button("Appliquer l'en-tête", size="sm")
with gr.Row():
with gr.Column(scale=1):
images_input = gr.File(label="Photos / PDF", file_count="multiple",
file_types=["image", ".pdf"])
generate_btn = gr.Button("Générer LaTeX et PDF", variant="primary")
status = gr.Textbox(label="Status", interactive=False, lines=4)
# Hidden until Claude detects a cover/intro page on photo 0.
intro_toggle = gr.Checkbox(
label="Inclure la page d'introduction détectée (couverture)",
value=False, visible=False,
)
with gr.Row():
pdf_download = gr.File(label="Télécharger PDF", interactive=False)
tex_download = gr.File(label="Télécharger .tex", interactive=False)
with gr.Column(scale=2):
pdf_gallery = gr.Gallery(label="Aperçu PDF", columns=1, height=720,
object_fit="contain", show_label=True)
gr.Markdown("## Modifications")
gr.Markdown("Décris en français la modification souhaitée.")
instruction_input = gr.Textbox(label="Modification", lines=2)
modify_btn = gr.Button("Appliquer la modification", variant="primary")
gr.Markdown("## Historique des versions")
version_dropdown = gr.Dropdown(label="Versions", choices=[], interactive=True)
# All 9 outputs — shared across all handlers
_all_outputs = [pdf_gallery, pdf_download, tex_download, data_state,
status, workdir_state, versions_state, version_dropdown,
intro_toggle]
apply_header_btn.click(
handle_apply_header,
inputs=[data_state, workdir_state, versions_state, intro_toggle,
hdr_school, hdr_teacher, hdr_date, hdr_title],
outputs=_all_outputs,
)
generate_btn.click(
handle_upload,
inputs=[images_input, hdr_school, hdr_teacher, hdr_date, hdr_title],
outputs=_all_outputs,
)
modify_btn.click(
handle_modification,
inputs=[data_state, workdir_state, versions_state, intro_toggle, instruction_input],
outputs=_all_outputs,
)
version_dropdown.change(
handle_version_select,
inputs=[version_dropdown, versions_state, intro_toggle],
outputs=_all_outputs,
)
intro_toggle.change(
handle_intro_toggle,
inputs=[data_state, workdir_state, versions_state, intro_toggle],
outputs=_all_outputs,
)
if __name__ == "__main__":
password = os.environ.get("APP_PASSWORD", "")
if password:
def check_pw(_, pw):
return pw == password
demo.launch(server_name="0.0.0.0", server_port=7860,
auth=check_pw, auth_message="Entrez le mot de passe :")
else:
demo.launch(server_name="0.0.0.0", server_port=7860)