""" Conference-template conformance checker. Reads the rich rule set defined in :mod:`src.templates.base_template` and runs per-venue checks against the LaTeX source. Each rule fragment lives in its own small private method so adding new conferences (or new rules) doesn't bloat the public ``check`` method. Severity convention used here: * ``ERROR`` — desk-reject material if uncorrected (NeurIPS missing checklist, ACL missing Limitations, double-blind \\author leak). * ``WARNING`` — likely a real problem but might be a false positive (style package mismatch, identifying URL). * ``INFO`` — soft reminder that something MUST happen later (camera-ready sections, lay summaries, font requirements, page-limit estimation that the .tex source can't actually verify). """ from __future__ import annotations import re from typing import List, Optional from .base import BaseChecker, CheckResult, CheckSeverity # ------------------------------------------------------------------ helpers --- # Match \section{X}, \subsection{X}, \paragraph{X}, optionally starred, # allowing an optional [short] argument before the {body}. def _section_pattern(name: str) -> re.Pattern: return re.compile( r'\\(?:section|subsection|paragraph)\*?\s*(?:\[[^\]]*\])?\s*\{[^}]*?' + re.escape(name) + r'[^}]*\}', re.IGNORECASE, ) # Domains/URL patterns that strongly de-anonymize an author. Whitelisted # domains (which legitimately appear in CV/ML papers without leaking identity) # are excluded. _IDENTIFYING_URL_PATTERNS = [ re.compile(r'\bgithub\.com/(?!anonymous)[A-Za-z0-9_\-]+/', re.IGNORECASE), re.compile(r'\b[A-Za-z0-9_\-]+\.github\.io\b', re.IGNORECASE), re.compile(r'\bgitlab\.com/(?!anonymous)[A-Za-z0-9_\-]+/', re.IGNORECASE), re.compile(r'\bbitbucket\.org/(?!anonymous)[A-Za-z0-9_\-]+/', re.IGNORECASE), re.compile(r'\b(?:huggingface\.co|wandb\.ai)/(?!anonymous)[A-Za-z0-9_\-]+/', re.IGNORECASE), re.compile(r'\b(?:linkedin|twitter|x)\.com/[A-Za-z0-9_\-]+', re.IGNORECASE), ] # URLs that are explicitly anonymous-friendly and should NOT be flagged. _ANONYMOUS_URL_HINTS = re.compile( r'(anonymous|anon|blind|review|submission|4open\.science)', re.IGNORECASE, ) # Capture URLs from \url{...}, \href{...}{...}, and bare http(s)://... _URL_FROM_TEX = re.compile( r'\\(?:url|href)\s*\{([^}]+)\}|(? List[CheckResult]: config = config or {} template = config.get("template") if template is None: return [] content = self._remove_comments(tex_content) results: List[CheckResult] = [] self._check_mandatory_sections(template, content, results) self._check_camera_only_sections(template, content, results) self._check_style_package(template, content, results) self._check_doc_class(template, content, results) self._check_paper_size(template, content, results) if template.double_blind: self._check_double_blind_author(template, content, results) if template.forbid_identifying_urls: self._check_identifying_urls(template, content, results) if template.forbid_acks_in_review: self._check_acknowledgments(template, content, results) if template.requires_paper_checklist: self._check_paper_checklist(template, content, results) if template.requires_reproducibility_statement: self._check_reproducibility_statement(template, content, results) if template.requires_lay_summary_camera: self._inform_lay_summary(template, results) if template.requires_type1_fonts: self._inform_type1_fonts(template, results) if template.min_main_pages > 0: self._inform_min_pages(template, results) if "Limitations" in template.mandatory_sections: self._check_limitations_content(template, content, results) return results # ============================================================== sections == def _check_mandatory_sections(self, template, content: str, results: List[CheckResult]): for section in template.mandatory_sections or []: if not _section_pattern(section).search(content): results.append(self._create_result( passed=False, severity=CheckSeverity.ERROR, message=f"[{template.name}] Missing mandatory section: '{section}'", suggestion=f"Add `\\section{{{section}}}` (required by {template.name}).", )) def _check_camera_only_sections(self, template, content: str, results: List[CheckResult]): for section in template.mandatory_camera_sections or []: if not _section_pattern(section).search(content): results.append(self._create_result( passed=False, severity=CheckSeverity.INFO, message=( f"[{template.name}] Camera-ready section '{section}' not found. " "Required for the camera-ready version, optional for review." ), suggestion=f"Add `\\section{{{section}}}` before References for camera-ready.", )) # =================================================== style / typesetting == def _check_style_package(self, template, content: str, results: List[CheckResult]): pkg = (template.style_package or "").strip() if not pkg: return pkg_re = re.compile( r'\\(?:usepackage|documentclass)(?:\[[^\]]*\])?\s*\{\s*' + re.escape(pkg) + r'\s*\}' ) if not pkg_re.search(content): results.append(self._create_result( passed=False, severity=CheckSeverity.WARNING, message=( f"[{template.name}] Style package '{pkg}' not found. " "If you really are submitting to this venue, your template may be wrong." ), suggestion=f"Use the official `{pkg}` style package.", )) def _check_doc_class(self, template, content: str, results: List[CheckResult]): wanted = (template.doc_class or "").strip() if not wanted: return m = _DOCCLASS_RE.search(content) actual = m.group(2).strip() if m else "" if actual.lower() != wanted.lower(): results.append(self._create_result( passed=False, severity=CheckSeverity.WARNING, message=( f"[{template.name}] Expected `\\documentclass{{{wanted}}}`, " f"found `{actual or 'none'}`." ), suggestion=f"Use the official document class `{wanted}` (Springer LNCS for ECCV).", )) def _check_paper_size(self, template, content: str, results: List[CheckResult]): wanted = (template.paper_size or "").lower() if wanted not in {"letter", "a4"}: return m = _DOCCLASS_RE.search(content) if not m: return opts = (m.group(1) or "").lower() actual = None if "letterpaper" in opts or "letter" in opts: actual = "letter" elif "a4paper" in opts or "a4" in opts: actual = "a4" if actual and actual != wanted: results.append(self._create_result( passed=False, severity=CheckSeverity.WARNING, message=( f"[{template.name}] Expected paper size '{wanted}', " f"document class is set to '{actual}'." ), suggestion=f"Use `\\documentclass[{wanted}paper]{{...}}`.", )) # ================================================================ blinding = def _check_double_blind_author(self, template, content: str, results: List[CheckResult]): m = re.search(r'\\author\s*(?:\[[^\]]*\])?\s*\{([^}]*)\}', content) if not m: return body = m.group(1) if not body.strip(): return if re.search(r'(anonymous|hidden|blind|submission)', body, re.IGNORECASE): return line_num = self._find_line_number(content, m.start()) results.append(self._create_result( passed=False, severity=CheckSeverity.ERROR, message=f"[{template.name}] Double-blind: \\author appears to contain identifying info", line_number=line_num, line_content=body.strip(), suggestion=r"Replace \author with anonymous placeholder during review.", )) def _check_identifying_urls(self, template, content: str, results: List[CheckResult]): for m in _URL_FROM_TEX.finditer(content): url = (m.group(1) or m.group(2) or "").strip() if not url: continue if _ANONYMOUS_URL_HINTS.search(url): continue for pat in _IDENTIFYING_URL_PATTERNS: if pat.search(url): line_num = self._find_line_number(content, m.start()) results.append(self._create_result( passed=False, severity=CheckSeverity.WARNING, message=( f"[{template.name}] Possible identifying URL during double-blind review: " f"{url[:120]}" ), line_number=line_num, line_content=url, suggestion=( "Use Anonymous GitHub (https://anonymous.4open.science) or remove " "the link until the camera-ready version." ), )) break # one finding per URL def _check_acknowledgments(self, template, content: str, results: List[CheckResult]): for pat in _ACK_PATTERNS: m = pat.search(content) if m: line_num = self._find_line_number(content, m.start()) results.append(self._create_result( passed=False, severity=CheckSeverity.WARNING, message=( f"[{template.name}] Acknowledgments section detected; " f"{template.short_name.upper()} requires omitting it during review." ), line_number=line_num, suggestion=( "Comment out or wrap acks in `\\if...\\fi` so they only " "appear in the camera-ready version." ), )) return # one finding is enough # ============================================== per-venue special items === def _check_paper_checklist(self, template, content: str, results: List[CheckResult]): for pat in _NEURIPS_CHECKLIST_PATTERNS: if pat.search(content): return results.append(self._create_result( passed=False, severity=CheckSeverity.ERROR, message=( f"[{template.name}] NeurIPS Paper Checklist not found. " "NeurIPS desk-rejects submissions without the checklist." ), suggestion=( "Add `\\input{neurips_paper_checklist}` (or paste the official template) " "after References / supplementary." ), )) def _check_reproducibility_statement(self, template, content: str, results: List[CheckResult]): if _REPRO_SECTION.search(content): return results.append(self._create_result( passed=False, severity=CheckSeverity.INFO, message=( f"[{template.name}] Reproducibility Statement not found. " "It's encouraged (~1 page) and does not count toward the page limit." ), suggestion=( "Add `\\section*{Reproducibility Statement}` before References summarizing " "code/data/seeds/hyperparameter availability." ), )) def _inform_lay_summary(self, template, results: List[CheckResult]): results.append(self._create_result( passed=False, severity=CheckSeverity.INFO, message=( f"[{template.name}] Lay summary required at camera-ready time " "(plain-language summary submitted via OpenReview)." ), suggestion="Draft a 1–2 paragraph plain-language summary now to avoid a last-minute scramble.", )) def _inform_type1_fonts(self, template, results: List[CheckResult]): results.append(self._create_result( passed=False, severity=CheckSeverity.INFO, message=( f"[{template.name}] Embedded fonts must be Type-1 only — verify with " "`pdffonts `. Cannot be checked from .tex source alone." ), suggestion="Compile with `pdflatex` (not XeLaTeX/LuaLaTeX) and convert any Type-3 fonts.", )) def _inform_min_pages(self, template, results: List[CheckResult]): results.append(self._create_result( passed=False, severity=CheckSeverity.INFO, message=( f"[{template.name}] Main text must be at least {template.min_main_pages} pages " f"and at most {template.page_limit_review} pages. Cannot be measured from source." ), suggestion=( f"Compile and confirm the rendered PDF stays within " f"{template.min_main_pages}–{template.page_limit_review} pages of main text." ), )) # ============================================ ACL family: Limitations rule def _check_limitations_content(self, template, content: str, results: List[CheckResult]): # Find the Limitations section span up to the next \section or end of doc. m = re.search( r'(\\section\*?\s*(?:\[[^\]]*\])?\s*\{[^}]*Limitations[^}]*\})', content, re.IGNORECASE, ) if not m: return # mandatory_sections check already flagged absence start = m.end() nxt = re.search(r'\\section\*?\s*\{', content[start:], re.IGNORECASE) end = start + nxt.start() if nxt else len(content) section_body = content[start:end] # Discussion-only rule: no floats, no nested \section if _FLOAT_OR_NEW_SECTION_RE.search(section_body): line_num = self._find_line_number(content, start) results.append(self._create_result( passed=False, severity=CheckSeverity.WARNING, message=( f"[{template.name}] Limitations section appears to contain floats or a " "nested section. ACL/EMNLP/NAACL require Limitations to be discussion only." ), line_number=line_num, suggestion=( "Move tables/figures/algorithms out of Limitations into the main body or " "appendix; Limitations should be prose-only." ), ))