""" photos -> latex Gradio app: upload photos, extract structured exam data via the Anthropic API (Claude Sonnet, vision + tool use), render it into LaTeX from a fixed template, compile to PDF. Supports iterative self-verification and natural-language modifications. """ import base64 import io import json import os import shutil import subprocess import tempfile import time from pathlib import Path import gradio as gr from anthropic import Anthropic from PIL import Image try: from pillow_heif import register_heif_opener register_heif_opener() except ImportError: pass # --------------------------------------------------------------------------- # Configuration # --------------------------------------------------------------------------- MODEL = "claude-sonnet-4-6" MAX_TOKENS = 16000 MAX_IMAGES = 5 VERIFY_BUDGET_S = 120 VERIFY_MAX_ITERATIONS = 4 VERIFY_MAX_FIGURE_REFINEMENTS = 1 MAX_IMAGE_DIM = 1568 MAX_KEPT_WORKDIRS = 20 # higher for version history SKILL_PATH = Path(__file__).parent / "skill.md" SYSTEM_PROMPT = SKILL_PATH.read_text(encoding="utf-8") _client: Anthropic | None = None def get_client() -> Anthropic: global _client if _client is None: if not os.environ.get("ANTHROPIC_API_KEY"): raise RuntimeError( "ANTHROPIC_API_KEY n'est pas défini. " "Définis ce secret dans les paramètres du HF Space." ) _client = Anthropic() return _client # --------------------------------------------------------------------------- # Tool schemas # --------------------------------------------------------------------------- _FIGURE_ITEM = { "type": "object", "properties": { "id": {"type": "string", "description": "Short unique name, e.g. 'fig1'"}, "image_index": {"type": "integer", "description": "0-based index into uploaded photos"}, "bbox": { "type": "array", "items": {"type": "number"}, "minItems": 4, "maxItems": 4, "description": "[x1,y1,x2,y2] fractional; (0,0)=top-left, (1,1)=bottom-right", }, }, "required": ["id", "image_index", "bbox"], } _ROW_ITEM = { "type": "object", "properties": { "bareme": {"type": "string", "description": "Per-question barème (e.g. '0,5'). Empty '' if none."}, "content": {"type": "string", "description": "Question text with inline $math$ and numbering."}, "figure_id": {"type": "string", "description": "If set, this row is a blank-box figure."}, "figure_width_cm": {"type": "number", "description": "Width of the blank box in cm."}, }, "required": ["content"], } _SECTION_ITEM = { "type": "object", "properties": { "title": {"type": "string", "description": "e.g. 'Exercice1', 'Partie II :', 'Problème'"}, "bareme": {"type": "string", "description": "Per-exercise barème (e.g. '3points'). '' if per-question."}, "intro": {"type": "string", "description": "Optional setup text before questions. '' if none."}, "rows": {"type": "array", "items": _ROW_ITEM, "description": "Questions/sub-questions in order"}, }, "required": ["title", "rows"], } _INTRO_PAGE = { "type": "object", "description": ( "Optional cover/intro page content. Set ONLY when the first photo " "is a standalone cover (no questions) — otherwise omit this field." ), "properties": { "title": {"type": "string"}, "subtitle": {"type": "string"}, "info_rows": { "type": "array", "items": { "type": "object", "properties": { "label": {"type": "string"}, "value": {"type": "string"}, }, "required": ["label", "value"], }, }, "bullets": { "type": "array", "items": { "type": "object", "properties": { "description": {"type": "string"}, "bareme": {"type": "string"}, }, "required": ["description"], }, }, "footer": {"type": "string"}, }, } EXAM_TOOL = { "name": "submit_exam", "description": ( "Submit extracted exam data. The pipeline renders it into LaTeX " "from a fixed template — you never write LaTeX structure yourself." ), "input_schema": { "type": "object", "properties": { "header": { "type": "object", "properties": { "top_left": {"type": "string"}, "school": {"type": "string"}, "exam_info": {"type": "string"}, "right": {"type": "string"}, }, "required": ["top_left", "school", "exam_info", "right"], }, "sections": {"type": "array", "items": _SECTION_ITEM}, "figures": {"type": "array", "items": _FIGURE_ITEM}, "intro_page": _INTRO_PAGE, "changelog": { "type": "string", "description": ( "Brief description of what was changed (for modifications). " "Leave empty for initial extraction." ), }, }, "required": ["header", "sections", "figures"], }, } VERDICT_TOOL = { "name": "submit_verdict", "description": ( "Verdict on whether the rendered PDF matches the original photos. " "OK = faithful. FIX = provide corrected exam data." ), "input_schema": { "type": "object", "properties": { "verdict": {"type": "string", "enum": ["OK", "FIX"]}, "exam_data": { "type": "object", "description": "Required if verdict=FIX. Full corrected exam data (same schema as submit_exam).", "properties": { "header": EXAM_TOOL["input_schema"]["properties"]["header"], "sections": EXAM_TOOL["input_schema"]["properties"]["sections"], "figures": EXAM_TOOL["input_schema"]["properties"]["figures"], "intro_page": _INTRO_PAGE, }, }, }, "required": ["verdict"], }, } # --------------------------------------------------------------------------- # LaTeX renderer — the fixed template # --------------------------------------------------------------------------- _PREAMBLE = r"""\documentclass[12pt,a4paper]{article} \usepackage[utf8]{inputenc} \usepackage[T1]{fontenc} \usepackage[french]{babel} \usepackage{amsmath,amssymb,amsfonts} \usepackage{geometry} \usepackage{enumitem} \usepackage{array} \usepackage{tabularx} \usepackage{longtable} \usepackage{graphicx} \usepackage{lastpage} \geometry{left=1.5cm, right=1.5cm, top=1cm, bottom=1cm, headheight=0pt, headsep=0pt, footskip=0pt} \pagestyle{empty} \setlength{\parindent}{0pt} \setlength{\tabcolsep}{4pt} \newlength{\baremeW} \setlength{\baremeW}{1.6cm} \newcounter{exampage} \setcounter{exampage}{0} \newcommand{\bumppage}{\stepcounter{exampage}\theexampage/\pageref{LastPage}} \begin{document} \setlength{\LTleft}{0pt} \setlength{\LTright}{0pt} \setlength{\LTpre}{0pt} \setlength{\LTpost}{0pt} \setcounter{LTchunksize}{100} """ _POSTAMBLE = r""" \end{longtable} \end{document} """ # Map of Unicode characters that pdflatex+T1 can't handle to their LaTeX equivalents. _UNICODE_TO_LATEX = { "Ω": r"$\Omega$", "ω": r"$\omega$", "α": r"$\alpha$", "β": r"$\beta$", "γ": r"$\gamma$", "δ": r"$\delta$", "π": r"$\pi$", "φ": r"$\varphi$", "ε": r"$\varepsilon$", "θ": r"$\theta$", "λ": r"$\lambda$", "μ": r"$\mu$", "σ": r"$\sigma$", "τ": r"$\tau$", "Δ": r"$\Delta$", "Σ": r"$\Sigma$", "∞": r"$\infty$", "√": r"$\sqrt{}$", "×": r"$\times$", "÷": r"$\div$", "≤": r"$\leq$", "≥": r"$\geq$", "≠": r"$\neq$", "≈": r"$\approx$", "∈": r"$\in$", "∉": r"$\notin$", "⊂": r"$\subset$", "∪": r"$\cup$", "∩": r"$\cap$", "∅": r"$\emptyset$", "∀": r"$\forall$", "∃": r"$\exists$", "→": r"$\to$", "←": r"$\leftarrow$", "⇒": r"$\Rightarrow$", "–": "--", "—": "---", "…": r"\dots ", "°": r"\textdegree{}", "\u200b": "", "\u00a0": "~", # zero-width space, non-breaking space } def _sanitize_latex(s: str) -> str: """Replace Unicode characters that pdflatex+T1 can't render.""" for char, replacement in _UNICODE_TO_LATEX.items(): s = s.replace(char, replacement) return s def _sanitize_text(s: str) -> str: """ One-stop sanitizer for any text going into the LaTeX template. Handles: Unicode → LaTeX, real newlines → \\newline. Must be applied to EVERY text field (header, title, intro, content, barème). """ # 1. Unicode symbols → LaTeX equivalents for char, replacement in _UNICODE_TO_LATEX.items(): s = s.replace(char, replacement) # 2. Real newline chars → LaTeX \newline (single replace only — see # the double-replace trap comment above the mapping table). s = s.replace("\n", r" \newline ") return s def _header_tabularx(h: dict) -> str: """The four-cell tabularx that draws the header bar. Shared by the longtable (wrapped in \\multicolumn) and the cover page (standalone).""" sc = _sanitize_text(h.get("school", "")) ei = _sanitize_text(h.get("exam_info", "")) rt = _sanitize_text(h.get("right", "")) # Use {\itshape ...} rather than \textit{...}: \textit parses its # argument via \check@nocorr@, which chokes on \begin{...} environments # (e.g. \begin{cases}) — we've seen it break the whole longtable. return "\n".join([ r"\noindent\begin{tabularx}{\textwidth}" r"{|>{\centering\arraybackslash}m{\baremeW}" r"|>{\centering\arraybackslash}X" r"|>{\centering\arraybackslash}X" r"|>{\centering\arraybackslash}X|}", r"\hline", rf"{{\itshape \bumppage}} & {{\itshape {sc}}} & " rf"{{\itshape {ei}}} & {{\itshape {rt}}} \tabularnewline", r"\hline", r"\end{tabularx}", ]) def _render_header_bar(h: dict) -> str: """Render the header bar as a longtable row (wrapped in \\multicolumn).""" return "\n".join([ r"\multicolumn{2}{@{}l@{}}{%", _header_tabularx(h), r"}\\*[-\arrayrulewidth]", r"\hline", ]) def _render_intro_page(intro: dict, header: dict) -> str: """Render the optional cover page. Shares the header bar with the longtable so the cover doesn't need its own title. Ends with \\newpage so the longtable starts on a fresh page.""" subtitle = _sanitize_text(intro.get("subtitle", "")) info_rows = intro.get("info_rows", []) or [] bullets = intro.get("bullets", []) or [] footer = _sanitize_text(intro.get("footer", "")) out: list[str] = [] out.append(r"\thispagestyle{empty}") # Same header bar as the longtable pages — \bumppage auto-increments # so the cover shows 1/N and the longtable resumes at 2/N. out.append(_header_tabularx(header)) out.append(r"\vspace*{1.5cm}") out.append(r"\begingroup") out.append(r"\linespread{1.5}\selectfont") if subtitle: out.append(r"\begin{center}{\LARGE " + subtitle + r"}\end{center}") out.append(r"\vspace{1cm}") if info_rows: out.append(r"\begin{center}") out.append(r"\large") out.append(r"\begin{tabular}{|l|l|}") out.append(r"\hline") for row in info_rows: label = _sanitize_text(row.get("label", "")) value = _sanitize_text(row.get("value", "")) out.append(rf"\textbf{{{label}}} & {value} \\") out.append(r"\hline") out.append(r"\end{tabular}") out.append(r"\end{center}") out.append(r"\vspace{1.5cm}") if bullets: out.append(r"\large") out.append(r"\begin{itemize}[leftmargin=2cm,itemsep=0.6em]") for b in bullets: desc = _sanitize_text(b.get("description", "") if isinstance(b, dict) else str(b)) bareme = _sanitize_text(b.get("bareme", "") if isinstance(b, dict) else "") if bareme: # description ……………………… (Xpts) — LaTeX \dotfill stretches the dots. out.append(rf" \item {desc}\dotfill ({bareme})") else: out.append(rf" \item {desc}") out.append(r"\end{itemize}") out.append(r"\vspace{1.5cm}") if footer: out.append(r"\begin{center}{\Large\underline{" + footer + r"}}\end{center}") out.append(r"\endgroup") out.append(r"\newpage") out.append("") return "\n".join(out) def render_tex(data: dict, include_intro: bool = False) -> str: """ Deterministically render structured exam data into a complete .tex file. Claude never writes LaTeX — this function owns all formatting. """ header = data.get("header", {}) sections = data.get("sections", []) intro_page = data.get("intro_page") lines: list[str] = [_PREAMBLE] if include_intro and intro_page: lines.append(_render_intro_page(intro_page, header)) # Longtable column spec lines.append( r"\begin{longtable}{|>{\centering\arraybackslash}m{\baremeW}" r"|p{\dimexpr\textwidth-\baremeW-4\tabcolsep-3\arrayrulewidth\relax}|}" ) lines.append("") # First-page header lines.append("% ----- Header (first page) -----") lines.append(_render_header_bar(header)) lines.append(r"\endfirsthead") lines.append("") # Subsequent-page header lines.append("% ----- Header (subsequent pages) -----") lines.append(_render_header_bar(header)) lines.append(r"\endhead") lines.append("") # Sections for si, section in enumerate(sections): title = _sanitize_text(section.get("title", "")) bareme_global = _sanitize_text(section.get("bareme", "")) intro = _sanitize_text(section.get("intro", "")) rows = section.get("rows", []) lines.append(f"% =========== {title} ===========") # Title row: global barème (if per-exercise) or empty if bareme_global: lines.append(rf"{{\itshape {bareme_global}}}") else: lines.append("") lines.append("&") lines.append(rf"\underline{{\textbf{{{title}}}}}") # Intro paragraph (in same cell as title) if intro: lines.append("") lines.append(rf"{{\itshape {intro}}}") lines.append(r"\\") # softrow after title # Content rows for row in rows: rb = _sanitize_text(row.get("bareme", "")) fig_id = row.get("figure_id", "") # Barème cell if rb: lines.append(rf"{{\itshape {rb}}}") else: lines.append("") lines.append("&") # Content cell if fig_id: w = row.get("figure_width_cm", 7) lines.append( rf"\begin{{center}}" rf"\includegraphics[width={w}cm]{{{fig_id}.png}}" rf"\end{{center}}" ) else: content = _sanitize_text(row.get("content", "")) lines.append(rf"{{\itshape {content}}}") lines.append(r"\\") # softrow # Horizontal line between sections lines.append(r"\hline") lines.append("") lines.append(_POSTAMBLE) return "\n".join(lines) # --------------------------------------------------------------------------- # Image helpers # --------------------------------------------------------------------------- # Max longest-side for the on-disk copy used by every downstream step. # Keeps annotate_bboxes output under Anthropic's 5 MB/image limit in the # verify step while still being sharp enough to see bbox overlays. UPLOAD_MAX_DIM = 2200 def _downscale_to_tempfile(path: str, max_dim: int = UPLOAD_MAX_DIM) -> str: """EXIF-transpose + RGB + cap longest side, save JPEG to a tempdir. Returns a new path; original is left untouched.""" from PIL import ImageOps img = Image.open(path) try: img = ImageOps.exif_transpose(img) except Exception: pass if img.mode not in ("RGB", "L"): img = img.convert("RGB") w, h = img.size longest = max(w, h) if longest > max_dim: ratio = max_dim / longest img = img.resize( (max(1, int(w * ratio)), max(1, int(h * ratio))), Image.LANCZOS, ) tmpdir = tempfile.mkdtemp(prefix="img_") out = Path(tmpdir) / (Path(path).stem + ".jpg") img.save(out, format="JPEG", quality=92, optimize=True) return str(out) def _prepare_images(files) -> list[str]: """ Convert uploaded files to a flat list of image paths. PDFs are split into per-page PNGs via pdftoppm; images pass through. Every resulting image is downscaled to UPLOAD_MAX_DIM so downstream calls (annotate_bboxes, the verify step) stay within API limits. """ image_paths: list[str] = [] for f in files: path = f.name if path.lower().endswith(".pdf"): tmpdir = tempfile.mkdtemp(prefix="pdf2img_") subprocess.run( ["pdftoppm", "-png", "-r", "200", path, str(Path(tmpdir) / "page")], capture_output=True, ) pages = sorted(Path(tmpdir).glob("page-*.png")) for p in pages: try: image_paths.append(_downscale_to_tempfile(str(p))) except Exception: image_paths.append(str(p)) else: try: image_paths.append(_downscale_to_tempfile(path)) except Exception: image_paths.append(path) return image_paths def _image_block(path: str) -> dict: img = Image.open(path) try: from PIL import ImageOps img = ImageOps.exif_transpose(img) except Exception: pass if img.mode not in ("RGB", "L"): img = img.convert("RGB") w, h = img.size longest = max(w, h) if longest > MAX_IMAGE_DIM: ratio = MAX_IMAGE_DIM / longest img = img.resize( (max(1, int(w * ratio)), max(1, int(h * ratio))), Image.LANCZOS, ) buf = io.BytesIO() img.save(buf, format="JPEG", quality=90, optimize=True) data = base64.standard_b64encode(buf.getvalue()).decode("ascii") return { "type": "image", "source": {"type": "base64", "media_type": "image/jpeg", "data": data}, } def annotate_bboxes(image_path: str, figures: list[dict], image_index: int) -> Image.Image: from PIL import ImageDraw, ImageFont, ImageOps img = Image.open(image_path) try: img = ImageOps.exif_transpose(img) except Exception: pass if img.mode not in ("RGB", "RGBA"): img = img.convert("RGB") img = img.copy() draw = ImageDraw.Draw(img) w, h = img.size line_w = max(4, int(min(w, h) * 0.005)) try: font = ImageFont.load_default(size=max(20, int(min(w, h) * 0.025))) except Exception: font = ImageFont.load_default() for fig in figures: if int(fig.get("image_index", 0)) != image_index: continue bbox = fig.get("bbox") or [0, 0, 1, 1] x1 = int(max(0, min(1, float(bbox[0]))) * w) y1 = int(max(0, min(1, float(bbox[1]))) * h) x2 = int(max(0, min(1, float(bbox[2]))) * w) y2 = int(max(0, min(1, float(bbox[3]))) * h) draw.rectangle([x1, y1, x2, y2], outline="red", width=line_w) label = str(fig.get("id", "")) if label: tx, ty = x1 + line_w + 2, y1 + line_w + 2 draw.rectangle([tx - 2, ty - 2, tx + len(label) * 14, ty + 22], fill="red") draw.text((tx, ty), label, fill="white", font=font) return img def make_blank_box(image_path: str, bbox: list[float]) -> Image.Image: img = Image.open(image_path) try: from PIL import ImageOps img = ImageOps.exif_transpose(img) except Exception: pass src_w, src_h = img.size img.close() x1f = max(0, min(1, float(bbox[0]))) y1f = max(0, min(1, float(bbox[1]))) x2f = max(0, min(1, float(bbox[2]))) y2f = max(0, min(1, float(bbox[3]))) px_w = max(1, int((x2f - x1f) * src_w)) px_h = max(1, int((y2f - y1f) * src_h)) target_long = 800 longest = max(px_w, px_h) if longest > target_long: scale = target_long / longest px_w = max(1, int(px_w * scale)) px_h = max(1, int(px_h * scale)) box = Image.new("RGB", (px_w, px_h), color="white") from PIL import ImageDraw draw = ImageDraw.Draw(box) bw = max(2, int(min(px_w, px_h) * 0.005)) draw.rectangle([0, 0, px_w - 1, px_h - 1], outline="black", width=bw) return box def _extract_tool_call(response, tool_name: str) -> dict | None: for block in response.content: if getattr(block, "type", None) == "tool_use" and getattr(block, "name", None) == tool_name: return dict(block.input) return None def _extract_text(response) -> str: parts = [] for block in response.content: text = getattr(block, "text", None) if text: parts.append(text) return "".join(parts) # --------------------------------------------------------------------------- # Anthropic calls # --------------------------------------------------------------------------- _INITIAL_PROMPT = ( "Voici des photos d'une page à convertir. " "Extrais le contenu structuré et appelle l'outil `submit_exam`. " "Suis rigoureusement les règles du prompt système." ) def call_claude_initial(image_paths: list[str]) -> dict: """Send images, get structured exam data via tool use.""" content = [_image_block(p) for p in image_paths] content.append({"type": "text", "text": _INITIAL_PROMPT}) response = get_client().messages.create( model=MODEL, max_tokens=MAX_TOKENS, tools=[EXAM_TOOL], tool_choice={"type": "tool", "name": "submit_exam"}, system=SYSTEM_PROMPT, messages=[{"role": "user", "content": content}], ) payload = _extract_tool_call(response, "submit_exam") if payload is None: raise RuntimeError("Claude n'a pas appelé submit_exam.") return payload def call_claude_verify( image_paths: list[str], pdf_pages: list[str], figure_paths: list[str], current_figures: list[dict], current_data: dict, allow_figure_refinement: bool, ) -> tuple[str, dict | None]: """Returns ("OK", None) or ("FIX", new_data) or ("FAIL", error_str).""" content: list[dict] = [ {"type": "text", "text": "VÉRIFICATION. Photos ORIGINALES avec bboxes annotées en rouge :"} ] for idx, src in enumerate(image_paths): annotated = annotate_bboxes(src, current_figures, idx) buf = io.BytesIO() annotated.save(buf, format="JPEG", quality=85, optimize=True) b64 = base64.standard_b64encode(buf.getvalue()).decode("ascii") content.append({"type": "image", "source": {"type": "base64", "media_type": "image/jpeg", "data": b64}}) if figure_paths: content.append({"type": "text", "text": "Figures extraites (blank boxes) :"}) for p in figure_paths: content.append(_image_block(p)) content.append({"type": "text", "text": "PDF rendu :"}) for p in pdf_pages: content.append(_image_block(p)) content.append({ "type": "text", "text": ( "\n\nDonnées actuelles :\n```json\n" + json.dumps(current_data, ensure_ascii=False, indent=2)[:8000] + "\n```\n\n" "Compare le PDF rendu aux photos originales. " "Vérifie texte, formules, barème, figures. " "Appelle `submit_verdict` : verdict=OK si correct, " "verdict=FIX + exam_data corrigé sinon." ), }) response = get_client().messages.create( model=MODEL, max_tokens=MAX_TOKENS, tools=[VERDICT_TOOL], tool_choice={"type": "tool", "name": "submit_verdict"}, system=SYSTEM_PROMPT, messages=[{"role": "user", "content": content}], ) payload = _extract_tool_call(response, "submit_verdict") if payload is None: return ("FAIL", "Claude n'a pas appelé submit_verdict.") verdict = (payload.get("verdict") or "").strip().upper() if verdict == "OK": return ("OK", None) if verdict == "FIX": new_data = payload.get("exam_data") if not new_data or "sections" not in new_data: return ("FAIL", "Verdict=FIX sans exam_data valide.") if not allow_figure_refinement and "figures" in new_data: new_data["figures"] = current_data.get("figures", []) return ("FIX", new_data) return ("FAIL", f"Verdict inconnu : {verdict!r}.") def call_claude_modify(current_data: dict, instruction: str) -> dict: """Send current data + instruction, get modified data with changelog.""" user_message = ( "Voici les données structurées actuelles du document :\n\n" "```json\n" + json.dumps(current_data, ensure_ascii=False, indent=2) + "\n```\n\n" f"Demande de modification : {instruction}\n\n" "Modifie les données en conséquence et appelle `submit_exam` " "avec les données complètes mises à jour. " "N'apporte QUE les changements demandés.\n\n" "IMPORTANT : remplis le champ `changelog` avec une description " "courte (1-2 phrases en français) de ce que tu as changé. " "Exemple : 'Remplacé la date par Mai 2026 dans l'en-tête.'" ) response = get_client().messages.create( model=MODEL, max_tokens=MAX_TOKENS, tools=[EXAM_TOOL], tool_choice={"type": "tool", "name": "submit_exam"}, system=SYSTEM_PROMPT, messages=[{"role": "user", "content": user_message}], ) payload = _extract_tool_call(response, "submit_exam") if payload is None: raise RuntimeError("Claude n'a pas appelé submit_exam pour la modification.") return payload # --------------------------------------------------------------------------- # Compilation # --------------------------------------------------------------------------- _workdirs: list[str] = [] def _prune_workdirs(): while len(_workdirs) > MAX_KEPT_WORKDIRS: old = _workdirs.pop(0) shutil.rmtree(old, ignore_errors=True) def compile_pdf( data: dict, image_paths: list[str] | None = None, prev_workdir: str | None = None, include_intro: bool = False, ) -> tuple[str | None, str, str, str | None]: """ Render data → .tex, materialize figures, compile with pdflatex. Returns (pdf_path, tex_path, workdir, error). """ workdir = tempfile.mkdtemp(prefix="latex_") _workdirs.append(workdir) _prune_workdirs() figures = data.get("figures", []) # Materialize figures as blank boxes if figures and image_paths: for fig in figures: try: idx = int(fig.get("image_index", 0)) if 0 <= idx < len(image_paths): box = make_blank_box(image_paths[idx], fig["bbox"]) box.save(Path(workdir) / f"{fig['id']}.png", "PNG", optimize=True) except Exception as e: print(f"figure {fig.get('id', '?')}: {e}") elif prev_workdir and Path(prev_workdir).is_dir(): for png in Path(prev_workdir).glob("*.png"): if not png.name.startswith("preview-"): shutil.copy(png, Path(workdir) / png.name) # Render and write .tex tex_source = render_tex(data, include_intro=include_intro) tex_path = Path(workdir) / "doc.tex" tex_path.write_text(tex_source, encoding="utf-8") # Compile log = "" for _ in range(2): result = subprocess.run( ["pdflatex", "-interaction=nonstopmode", "doc.tex"], cwd=workdir, capture_output=True, text=True, encoding="utf-8", errors="replace", ) log = result.stdout pdf_path = Path(workdir) / "doc.pdf" if pdf_path.exists(): return str(pdf_path), str(tex_path), workdir, None error_lines = [l for l in log.split("\n") if l.startswith("!") or "Error" in l] return None, str(tex_path), workdir, "\n".join(error_lines[:25]) or log[-2000:] def render_pdf_pages(pdf_path: str) -> list[str]: workdir = Path(pdf_path).parent for stale in workdir.glob("preview-*.png"): try: stale.unlink() except OSError: pass prefix = workdir / "preview" result = subprocess.run( ["pdftoppm", "-png", "-r", "120", pdf_path, str(prefix)], capture_output=True, text=True, encoding="utf-8", errors="replace", ) if result.returncode != 0: return [] return [str(p) for p in sorted(workdir.glob("preview-*.png"))] # --------------------------------------------------------------------------- # Gradio handlers — all return 9 values: # pdf_gallery, pdf_download, tex_download, data_state, status, # workdir_state, versions_state, version_dropdown, intro_toggle # --------------------------------------------------------------------------- def _make_header(school, teacher, date, doc_type): return { "top_left": "", "school": school, "exam_info": doc_type, "right": f"{teacher}\nDate : {date}" if teacher else f"Date : {date}", } def _intro_toggle_update(data: dict | None, include_intro: bool): """Show the 'include intro' checkbox iff intro_page was detected.""" visible = bool(data and data.get("intro_page")) return gr.update(visible=visible, value=bool(include_intro) if visible else False) def _err(msg, versions_json="[]"): """Return a 9-tuple for error/empty states (leaves intro toggle unchanged).""" return [], None, None, "", msg, "", versions_json, gr.update(), gr.update() def handle_upload(files, school, teacher, date, doc_type, progress=gr.Progress()): if not files: return _err("Upload au moins une image ou un PDF.") if len(files) > MAX_IMAGES: return _err(f"Maximum {MAX_IMAGES} fichiers.") image_paths = _prepare_images(files) started = time.time() progress(0.1, desc="Extraction des données…") try: data = call_claude_initial(image_paths) except Exception as exc: return _err(f"Erreur API : {exc}") user_header = _make_header(school, teacher, date, doc_type) data["header"] = user_header if not data.get("sections"): return _err("Aucune section extraite.") progress(0.3, desc="Compilation…") try: pdf_path, tex_path, workdir, error = compile_pdf(data, image_paths=image_paths) except Exception as exc: return _err(f"Erreur pendant la compilation : {exc}") if error: return [], None, tex_path, json.dumps(data, ensure_ascii=False), f"Compilation échouée :\n{error}", workdir, "[]", gr.update(), _intro_toggle_update(data, False) pages = render_pdf_pages(pdf_path) data_json = json.dumps(data, ensure_ascii=False) # Iterative verify current_figures = list(data.get("figures", [])) figure_refinements = 0 fixes = 0 while True: elapsed = time.time() - started if fixes >= VERIFY_MAX_ITERATIONS or elapsed >= VERIFY_BUDGET_S: break frac = min(0.9, 0.4 + fixes * 0.15) progress(frac, desc=f"Vérification (correction {fixes + 1})…") fig_paths = [str(p) for p in sorted(Path(workdir).glob("*.png")) if "preview-" not in p.name] allow_fig = figure_refinements < VERIFY_MAX_FIGURE_REFINEMENTS try: verdict, new_data = call_claude_verify( image_paths, pages, fig_paths, current_figures, data, allow_fig ) except Exception: break if verdict == "OK": progress(1.0, desc="Terminé") break if verdict != "FIX" or not new_data: break fixes += 1 new_data["header"] = user_header new_figures = new_data.get("figures", []) has_new_figs = new_figures != current_figures and allow_fig progress(frac + 0.05, desc=f"Compilation correction {fixes}…") if has_new_figs: new_pdf, new_tex, new_wd, err = compile_pdf(new_data, image_paths=image_paths) current_figures = list(new_figures) figure_refinements += 1 else: new_data["figures"] = current_figures new_pdf, new_tex, new_wd, err = compile_pdf(new_data, prev_workdir=workdir) if err: break data = new_data data_json = json.dumps(data, ensure_ascii=False) pdf_path, tex_path, workdir = new_pdf, new_tex, new_wd pages = render_pdf_pages(pdf_path) elapsed = time.time() - started status_msg = f"Généré en {elapsed:.0f}s ({fixes} correction(s))." # Version history: v1 versions = [{"label": "v1 — Génération initiale", "data_json": data_json, "pdf_path": pdf_path or "", "tex_path": tex_path or "", "workdir": workdir or ""}] vj = json.dumps(versions, ensure_ascii=False) dd = gr.update(choices=[v["label"] for v in versions], value=versions[0]["label"]) if data.get("intro_page"): status_msg += " Page d'introduction détectée — coche la case pour l'inclure." return pages, pdf_path, tex_path, data_json, status_msg, workdir, vj, dd, _intro_toggle_update(data, False) def handle_apply_header(current_data_json, current_workdir, versions_json, include_intro, school, teacher, date, doc_type): if not current_data_json: return _err("Génère d'abord un document.", versions_json) try: data = json.loads(current_data_json) except json.JSONDecodeError: return _err("Données corrompues.", versions_json) data["header"] = _make_header(school, teacher, date, doc_type) pdf_path, tex_path, new_workdir, error = compile_pdf( data, prev_workdir=current_workdir or None, include_intro=bool(include_intro), ) if error: return [], None, tex_path, json.dumps(data, ensure_ascii=False), f"Compilation échouée :\n{error}", new_workdir, versions_json, gr.update(), _intro_toggle_update(data, include_intro) pages = render_pdf_pages(pdf_path) data_json = json.dumps(data, ensure_ascii=False) versions = json.loads(versions_json) if versions_json else [] n = len(versions) + 1 v = {"label": f"v{n} — En-tête modifié", "data_json": data_json, "pdf_path": pdf_path, "tex_path": tex_path, "workdir": new_workdir} versions.append(v) vj = json.dumps(versions, ensure_ascii=False) dd = gr.update(choices=[x["label"] for x in versions], value=v["label"]) return pages, pdf_path, tex_path, data_json, "En-tête mis à jour.", new_workdir, vj, dd, _intro_toggle_update(data, include_intro) def handle_modification(current_data_json, current_workdir, versions_json, include_intro, instruction): if not current_data_json: return _err("Génère d'abord un document.", versions_json) if not instruction or not instruction.strip(): return _err("Décris la modification.", versions_json) try: current_data = json.loads(current_data_json) except json.JSONDecodeError: return _err("Données corrompues.", versions_json) try: new_data = call_claude_modify(current_data, instruction) except Exception as exc: return [], None, None, current_data_json, f"Erreur API : {exc}", current_workdir, versions_json, gr.update(), gr.update() if not new_data.get("sections"): return [], None, None, current_data_json, "Modification invalide.", current_workdir, versions_json, gr.update(), gr.update() # Preserve the intro page across modifications unless Claude explicitly # rewrote it (the modify tool doesn't normally touch it). if "intro_page" not in new_data and current_data.get("intro_page"): new_data["intro_page"] = current_data["intro_page"] changelog = new_data.pop("changelog", "") or instruction[:80] pdf_path, tex_path, new_workdir, error = compile_pdf( new_data, prev_workdir=current_workdir or None, include_intro=bool(include_intro), ) if error: return [], None, tex_path, json.dumps(new_data, ensure_ascii=False), f"Compilation échouée :\n{error}", new_workdir, versions_json, gr.update(), _intro_toggle_update(new_data, include_intro) pages = render_pdf_pages(pdf_path) data_json = json.dumps(new_data, ensure_ascii=False) versions = json.loads(versions_json) if versions_json else [] n = len(versions) + 1 v = {"label": f"v{n} — {changelog[:60]}", "data_json": data_json, "pdf_path": pdf_path, "tex_path": tex_path, "workdir": new_workdir} versions.append(v) vj = json.dumps(versions, ensure_ascii=False) dd = gr.update(choices=[x["label"] for x in versions], value=v["label"]) return pages, pdf_path, tex_path, data_json, f"Modification : {changelog}", new_workdir, vj, dd, _intro_toggle_update(new_data, include_intro) def handle_version_select(version_label, versions_json, include_intro): """Restore a previous version from the dropdown.""" if not versions_json: return _err("Aucun historique.") versions = json.loads(versions_json) v = next((x for x in versions if x["label"] == version_label), None) if v is None: return _err(f"Version introuvable.", versions_json) pdf_path = v.get("pdf_path", "") tex_path = v.get("tex_path", "") workdir = v.get("workdir", "") data_json = v.get("data_json", "") pages = render_pdf_pages(pdf_path) if pdf_path and Path(pdf_path).exists() else [] dd = gr.update(choices=[x["label"] for x in versions], value=version_label) try: restored_data = json.loads(data_json) if data_json else {} except json.JSONDecodeError: restored_data = {} return ( pages, pdf_path if pdf_path and Path(pdf_path).exists() else None, tex_path if tex_path and Path(tex_path).exists() else None, data_json, f"Restauré : {version_label}", workdir, versions_json, dd, _intro_toggle_update(restored_data, include_intro), ) def handle_intro_toggle(current_data_json, current_workdir, versions_json, include_intro): """Recompile the current data with the new include_intro setting. Lightweight — no Claude call, just render + pdflatex.""" if not current_data_json: return _err("Génère d'abord un document.", versions_json) try: data = json.loads(current_data_json) except json.JSONDecodeError: return _err("Données corrompues.", versions_json) if not data.get("intro_page"): # Nothing to toggle; just re-emit current state with the toggle hidden. return _err("Aucune page d'introduction détectée.", versions_json) try: pdf_path, tex_path, new_workdir, error = compile_pdf( data, prev_workdir=current_workdir or None, include_intro=bool(include_intro), ) except Exception as exc: return _err(f"Erreur pendant la compilation : {exc}", versions_json) if error: return [], None, tex_path, current_data_json, f"Compilation échouée :\n{error}", new_workdir, versions_json, gr.update(), _intro_toggle_update(data, include_intro) pages = render_pdf_pages(pdf_path) versions = json.loads(versions_json) if versions_json else [] n = len(versions) + 1 label_suffix = "avec page d'intro" if include_intro else "sans page d'intro" v = {"label": f"v{n} — {label_suffix}", "data_json": current_data_json, "pdf_path": pdf_path, "tex_path": tex_path, "workdir": new_workdir} versions.append(v) vj = json.dumps(versions, ensure_ascii=False) dd = gr.update(choices=[x["label"] for x in versions], value=v["label"]) status = "Page d'introduction ajoutée." if include_intro else "Page d'introduction retirée." return pages, pdf_path, tex_path, current_data_json, status, new_workdir, vj, dd, _intro_toggle_update(data, include_intro) # --------------------------------------------------------------------------- # UI # --------------------------------------------------------------------------- with gr.Blocks(title="photos -> latex", theme=gr.themes.Soft()) as demo: gr.Markdown("# 📝 photos -> latex") gr.Markdown( f"Upload des photos ou un PDF (1 à {MAX_IMAGES} fichiers). " "L'app génère le code LaTeX et le PDF, puis te laisse demander " "des modifications en français." ) data_state = gr.Textbox(value="", visible=False, interactive=False) workdir_state = gr.Textbox(value="", visible=False, interactive=False) versions_state = gr.Textbox(value="[]", visible=False, interactive=False) gr.Markdown("### En-tête du document") with gr.Row(): hdr_school = gr.Textbox(label="École", value="Groupe Scolaire Charles Péguy", scale=2) hdr_teacher = gr.Textbox(label="Enseignant(e)", value="Mme Lahbabi") hdr_date = gr.Textbox(label="Date", value="01/05/2026") hdr_title = gr.Textbox(label="Type", value="Fiche d'exercices") apply_header_btn = gr.Button("Appliquer l'en-tête", size="sm") with gr.Row(): with gr.Column(scale=1): images_input = gr.File(label="Photos / PDF", file_count="multiple", file_types=["image", ".pdf"]) generate_btn = gr.Button("Générer LaTeX et PDF", variant="primary") status = gr.Textbox(label="Status", interactive=False, lines=4) # Hidden until Claude detects a cover/intro page on photo 0. intro_toggle = gr.Checkbox( label="Inclure la page d'introduction détectée (couverture)", value=False, visible=False, ) with gr.Row(): pdf_download = gr.File(label="Télécharger PDF", interactive=False) tex_download = gr.File(label="Télécharger .tex", interactive=False) with gr.Column(scale=2): pdf_gallery = gr.Gallery(label="Aperçu PDF", columns=1, height=720, object_fit="contain", show_label=True) gr.Markdown("## Modifications") gr.Markdown("Décris en français la modification souhaitée.") instruction_input = gr.Textbox(label="Modification", lines=2) modify_btn = gr.Button("Appliquer la modification", variant="primary") gr.Markdown("## Historique des versions") version_dropdown = gr.Dropdown(label="Versions", choices=[], interactive=True) # All 9 outputs — shared across all handlers _all_outputs = [pdf_gallery, pdf_download, tex_download, data_state, status, workdir_state, versions_state, version_dropdown, intro_toggle] apply_header_btn.click( handle_apply_header, inputs=[data_state, workdir_state, versions_state, intro_toggle, hdr_school, hdr_teacher, hdr_date, hdr_title], outputs=_all_outputs, ) generate_btn.click( handle_upload, inputs=[images_input, hdr_school, hdr_teacher, hdr_date, hdr_title], outputs=_all_outputs, ) modify_btn.click( handle_modification, inputs=[data_state, workdir_state, versions_state, intro_toggle, instruction_input], outputs=_all_outputs, ) version_dropdown.change( handle_version_select, inputs=[version_dropdown, versions_state, intro_toggle], outputs=_all_outputs, ) intro_toggle.change( handle_intro_toggle, inputs=[data_state, workdir_state, versions_state, intro_toggle], outputs=_all_outputs, ) if __name__ == "__main__": password = os.environ.get("APP_PASSWORD", "") if password: def check_pw(_, pw): return pw == password demo.launch(server_name="0.0.0.0", server_port=7860, auth=check_pw, auth_message="Entrez le mot de passe :") else: demo.launch(server_name="0.0.0.0", server_port=7860)