update
Browse files- app.py +4 -1
- posterbuilder/convert.py +73 -0
app.py
CHANGED
|
@@ -502,7 +502,10 @@ def run_pipeline(arxiv_url, pdf_file, openai_key, logo_files, meeting_logo_file,
|
|
| 502 |
logs.append(f"🆔 run_id = {WORK_DIR.name}")
|
| 503 |
|
| 504 |
_write_logs(LOG_PATH, logs)
|
| 505 |
-
yield "\n".join(logs), (
|
|
|
|
|
|
|
|
|
|
| 506 |
|
| 507 |
# =====================
|
| 508 |
# Gradio UI
|
|
|
|
| 502 |
logs.append(f"🆔 run_id = {WORK_DIR.name}")
|
| 503 |
|
| 504 |
_write_logs(LOG_PATH, logs)
|
| 505 |
+
yield "\n".join(logs), (
|
| 506 |
+
str(ZIP_PATH) if ZIP_PATH.exists() else None
|
| 507 |
+
), render_overleaf_button(overleaf_zip_b64)
|
| 508 |
+
|
| 509 |
|
| 510 |
# =====================
|
| 511 |
# Gradio UI
|
posterbuilder/convert.py
CHANGED
|
@@ -62,7 +62,78 @@ RIGHT_LOGO_INNERSEP_CM= 2.0
|
|
| 62 |
RIGHT_LOGO_XSHIFT_CM = -2.0
|
| 63 |
RIGHT_LOGO_YSHIFT_CM = 0.0
|
| 64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
# ===================== 基础工具 =====================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
def fix_latex_escaped_commands(s: str) -> str:
|
| 67 |
"""
|
| 68 |
修复由于 \t 被错误解析而导致的 LaTeX 命令丢失反斜杠问题,
|
|
@@ -161,6 +232,8 @@ def format_content_to_latex(content: str) -> str:
|
|
| 161 |
return ""
|
| 162 |
# 🔧 新增:先修复由于 \t 被误解析的 LaTeX 命令
|
| 163 |
content = fix_latex_escaped_commands(content)
|
|
|
|
|
|
|
| 164 |
|
| 165 |
lines = [ln.strip() for ln in content.splitlines() if ln.strip()]
|
| 166 |
if lines and all(ln.startswith(("-", "•")) for ln in lines):
|
|
|
|
| 62 |
RIGHT_LOGO_XSHIFT_CM = -2.0
|
| 63 |
RIGHT_LOGO_YSHIFT_CM = 0.0
|
| 64 |
|
| 65 |
+
# NEW: 规范器——把 \textit{...} 中“数学样式”的内容自动切到数学模式
|
| 66 |
+
MATH_BLOCK_RE = re.compile(
|
| 67 |
+
r"\${1,2}.*?\${1,2}" # $...$ 或 $$...$$
|
| 68 |
+
r"|\\\(.+?\\\)" # \( ... \)
|
| 69 |
+
r"|\\\[(?:.|\n)+?\\\]", # \[ ... \] (跨行)
|
| 70 |
+
re.S
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
# 常见希腊字母/数学宏,用于识别 \textit{\tau} 这类情况
|
| 74 |
+
GREEK_OR_MATH_MACROS = (
|
| 75 |
+
r"alpha|beta|gamma|delta|epsilon|varepsilon|zeta|eta|theta|vartheta|iota|kappa|lambda|"
|
| 76 |
+
r"mu|nu|xi|pi|varpi|rho|varrho|sigma|varsigma|tau|upsilon|phi|varphi|chi|psi|omega|"
|
| 77 |
+
r"Gamma|Delta|Theta|Lambda|Xi|Pi|Sigma|Upsilon|Phi|Psi|Omega"
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
# ===================== 基础工具 =====================
|
| 81 |
+
|
| 82 |
+
def normalize_textit_math(s: str) -> str:
|
| 83 |
+
"""
|
| 84 |
+
目的:
|
| 85 |
+
- \textit{\tau} -> $\tau$
|
| 86 |
+
- \textit{c}(\tau) -> $c(\tau)$
|
| 87 |
+
- \textit{c} -> $c$
|
| 88 |
+
规则:
|
| 89 |
+
- 先屏蔽已有数学块,避免误处理
|
| 90 |
+
- 仅把“单字母变量”或“以反斜杠开头的数学命令”从 \textit{...} 切换到数学模式
|
| 91 |
+
- 不碰 \textit{SST} 这类普通词
|
| 92 |
+
"""
|
| 93 |
+
if not s:
|
| 94 |
+
return s
|
| 95 |
+
|
| 96 |
+
# 1) 屏蔽现有数学块
|
| 97 |
+
stash = []
|
| 98 |
+
def _hide(m):
|
| 99 |
+
stash.append(m.group(0))
|
| 100 |
+
return f"\x00M{len(stash)-1}\x00"
|
| 101 |
+
s = MATH_BLOCK_RE.sub(_hide, s)
|
| 102 |
+
|
| 103 |
+
# 2a) \textit{\tau}、\textit{\zeta} ... -> $\tau$、$\zeta$
|
| 104 |
+
s = re.sub(
|
| 105 |
+
rf"\\textit\{{\s*(\\(?:{GREEK_OR_MATH_MACROS})\b[^\}}]*)\s*\}}",
|
| 106 |
+
r"$\1$",
|
| 107 |
+
s
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
# 2b) \textit{c}(\tau) 这种:单字母 + 直接跟括号表达式 -> $c(\tau)$
|
| 111 |
+
s = re.sub(
|
| 112 |
+
r"\\textit\{\s*([A-Za-z])\s*\}\s*\(\s*([^()$]+?)\s*\)",
|
| 113 |
+
r"$\1(\2)$",
|
| 114 |
+
s
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
# 2c) \textit{c}_0 或 \textit{q}^T 这种:把后续下/上标一并包进数学
|
| 118 |
+
s = re.sub(
|
| 119 |
+
r"\\textit\{\s*([A-Za-z])\s*\}\s*([_^]\s*(?:\{[^{}]*\}|[A-Za-z0-9]))",
|
| 120 |
+
r"$\1\2$",
|
| 121 |
+
s
|
| 122 |
+
)
|
| 123 |
+
|
| 124 |
+
# 2d) 单字母变量:\textit{c} / \textit{q} / \textit{X} -> $c$/$q$/$X$
|
| 125 |
+
s = re.sub(
|
| 126 |
+
r"\\textit\{\s*([A-Za-z])\s*\}",
|
| 127 |
+
r"$\1$",
|
| 128 |
+
s
|
| 129 |
+
)
|
| 130 |
+
|
| 131 |
+
# 3) 还原数学块
|
| 132 |
+
for i, blk in enumerate(stash):
|
| 133 |
+
s = s.replace(f"\x00M{i}\x00", blk)
|
| 134 |
+
|
| 135 |
+
return s
|
| 136 |
+
|
| 137 |
def fix_latex_escaped_commands(s: str) -> str:
|
| 138 |
"""
|
| 139 |
修复由于 \t 被错误解析而导致的 LaTeX 命令丢失反斜杠问题,
|
|
|
|
| 232 |
return ""
|
| 233 |
# 🔧 新增:先修复由于 \t 被误解析的 LaTeX 命令
|
| 234 |
content = fix_latex_escaped_commands(content)
|
| 235 |
+
# NEW: 规范 \textit{...} 中的“伪数学”写法
|
| 236 |
+
content = normalize_textit_math(content)
|
| 237 |
|
| 238 |
lines = [ln.strip() for ln in content.splitlines() if ln.strip()]
|
| 239 |
if lines and all(ln.startswith(("-", "•")) for ln in lines):
|