JaceWei commited on
Commit
ba73199
·
1 Parent(s): 90d8b4c
Files changed (2) hide show
  1. app.py +4 -1
  2. posterbuilder/convert.py +73 -0
app.py CHANGED
@@ -502,7 +502,10 @@ def run_pipeline(arxiv_url, pdf_file, openai_key, logo_files, meeting_logo_file,
502
  logs.append(f"🆔 run_id = {WORK_DIR.name}")
503
 
504
  _write_logs(LOG_PATH, logs)
505
- yield "\n".join(logs), (str(ZIP_PATH) if ZIP_PATH.exists() else None), overleaf_zip_b64
 
 
 
506
 
507
  # =====================
508
  # Gradio UI
 
502
  logs.append(f"🆔 run_id = {WORK_DIR.name}")
503
 
504
  _write_logs(LOG_PATH, logs)
505
+ yield "\n".join(logs), (
506
+ str(ZIP_PATH) if ZIP_PATH.exists() else None
507
+ ), render_overleaf_button(overleaf_zip_b64)
508
+
509
 
510
  # =====================
511
  # Gradio UI
posterbuilder/convert.py CHANGED
@@ -62,7 +62,78 @@ RIGHT_LOGO_INNERSEP_CM= 2.0
62
  RIGHT_LOGO_XSHIFT_CM = -2.0
63
  RIGHT_LOGO_YSHIFT_CM = 0.0
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  # ===================== 基础工具 =====================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  def fix_latex_escaped_commands(s: str) -> str:
67
  """
68
  修复由于 \t 被错误解析而导致的 LaTeX 命令丢失反斜杠问题,
@@ -161,6 +232,8 @@ def format_content_to_latex(content: str) -> str:
161
  return ""
162
  # 🔧 新增:先修复由于 \t 被误解析的 LaTeX 命令
163
  content = fix_latex_escaped_commands(content)
 
 
164
 
165
  lines = [ln.strip() for ln in content.splitlines() if ln.strip()]
166
  if lines and all(ln.startswith(("-", "•")) for ln in lines):
 
62
  RIGHT_LOGO_XSHIFT_CM = -2.0
63
  RIGHT_LOGO_YSHIFT_CM = 0.0
64
 
65
+ # NEW: 规范器——把 \textit{...} 中“数学样式”的内容自动切到数学模式
66
+ MATH_BLOCK_RE = re.compile(
67
+ r"\${1,2}.*?\${1,2}" # $...$ 或 $$...$$
68
+ r"|\\\(.+?\\\)" # \( ... \)
69
+ r"|\\\[(?:.|\n)+?\\\]", # \[ ... \] (跨行)
70
+ re.S
71
+ )
72
+
73
+ # 常见希腊字母/数学宏,用于识别 \textit{\tau} 这类情况
74
+ GREEK_OR_MATH_MACROS = (
75
+ r"alpha|beta|gamma|delta|epsilon|varepsilon|zeta|eta|theta|vartheta|iota|kappa|lambda|"
76
+ r"mu|nu|xi|pi|varpi|rho|varrho|sigma|varsigma|tau|upsilon|phi|varphi|chi|psi|omega|"
77
+ r"Gamma|Delta|Theta|Lambda|Xi|Pi|Sigma|Upsilon|Phi|Psi|Omega"
78
+ )
79
+
80
  # ===================== 基础工具 =====================
81
+
82
+ def normalize_textit_math(s: str) -> str:
83
+ """
84
+ 目的:
85
+ - \textit{\tau} -> $\tau$
86
+ - \textit{c}(\tau) -> $c(\tau)$
87
+ - \textit{c} -> $c$
88
+ 规则:
89
+ - 先屏蔽已有数学块,避免误处理
90
+ - 仅把“单字母变量”或“以反斜杠开头的数学命令”从 \textit{...} 切换到数学模式
91
+ - 不碰 \textit{SST} 这类普通词
92
+ """
93
+ if not s:
94
+ return s
95
+
96
+ # 1) 屏蔽现有数学块
97
+ stash = []
98
+ def _hide(m):
99
+ stash.append(m.group(0))
100
+ return f"\x00M{len(stash)-1}\x00"
101
+ s = MATH_BLOCK_RE.sub(_hide, s)
102
+
103
+ # 2a) \textit{\tau}、\textit{\zeta} ... -> $\tau$、$\zeta$
104
+ s = re.sub(
105
+ rf"\\textit\{{\s*(\\(?:{GREEK_OR_MATH_MACROS})\b[^\}}]*)\s*\}}",
106
+ r"$\1$",
107
+ s
108
+ )
109
+
110
+ # 2b) \textit{c}(\tau) 这种:单字母 + 直接跟括号表达式 -> $c(\tau)$
111
+ s = re.sub(
112
+ r"\\textit\{\s*([A-Za-z])\s*\}\s*\(\s*([^()$]+?)\s*\)",
113
+ r"$\1(\2)$",
114
+ s
115
+ )
116
+
117
+ # 2c) \textit{c}_0 或 \textit{q}^T 这种:把后续下/上标一并包进数学
118
+ s = re.sub(
119
+ r"\\textit\{\s*([A-Za-z])\s*\}\s*([_^]\s*(?:\{[^{}]*\}|[A-Za-z0-9]))",
120
+ r"$\1\2$",
121
+ s
122
+ )
123
+
124
+ # 2d) 单字母变量:\textit{c} / \textit{q} / \textit{X} -> $c$/$q$/$X$
125
+ s = re.sub(
126
+ r"\\textit\{\s*([A-Za-z])\s*\}",
127
+ r"$\1$",
128
+ s
129
+ )
130
+
131
+ # 3) 还原数学块
132
+ for i, blk in enumerate(stash):
133
+ s = s.replace(f"\x00M{i}\x00", blk)
134
+
135
+ return s
136
+
137
  def fix_latex_escaped_commands(s: str) -> str:
138
  """
139
  修复由于 \t 被错误解析而导致的 LaTeX 命令丢失反斜杠问题,
 
232
  return ""
233
  # 🔧 新增:先修复由于 \t 被误解析的 LaTeX 命令
234
  content = fix_latex_escaped_commands(content)
235
+ # NEW: 规范 \textit{...} 中的“伪数学”写法
236
+ content = normalize_textit_math(content)
237
 
238
  lines = [ln.strip() for ln in content.splitlines() if ln.strip()]
239
  if lines and all(ln.startswith(("-", "•")) for ln in lines):