Spaces:
Running
Running
Sophie
commited on
Commit
·
6de8c39
1
Parent(s):
ab99a3d
minor fixes
Browse files- src/latex_clean.py +9 -36
- src/streamlit_app.py +3 -2
src/latex_clean.py
CHANGED
|
@@ -11,43 +11,19 @@ _MATH_ENVS = [
|
|
| 11 |
def _fix_truncated_end_braces(s: str) -> str:
|
| 12 |
return re.sub(r'(\\end\{[A-Za-z]+(?:\*)?)(?=\s|$)', r'\1}', s)
|
| 13 |
|
| 14 |
-
def _close_unclosed_envs(s: str) -> str:
|
| 15 |
-
token = re.compile(
|
| 16 |
-
r'\\begin\{(?P<b_env>[A-Za-z]+)(?P<b_star>\*)?\}'
|
| 17 |
-
r'|\\end\{(?P<e_env>[A-Za-z]+)(?P<e_star>\*)?}?',
|
| 18 |
-
re.DOTALL
|
| 19 |
-
)
|
| 20 |
-
|
| 21 |
-
stack = []
|
| 22 |
-
for m in token.finditer(s):
|
| 23 |
-
if m.group('b_env'):
|
| 24 |
-
env = m.group('b_env')
|
| 25 |
-
star = m.group('b_star') or ''
|
| 26 |
-
if env in _MATH_ENVS:
|
| 27 |
-
stack.append((env, star))
|
| 28 |
-
else:
|
| 29 |
-
env = m.group('e_env')
|
| 30 |
-
star = m.group('e_star') or ''
|
| 31 |
-
if stack and stack[-1] == (env, star):
|
| 32 |
-
stack.pop()
|
| 33 |
-
|
| 34 |
-
if not stack:
|
| 35 |
-
return s
|
| 36 |
-
|
| 37 |
-
# Append missing delimiters in reverse order
|
| 38 |
-
closers = ''.join(f'\n\\end{{{env}{star}}}' for env, star in reversed(stack))
|
| 39 |
-
return s + closers
|
| 40 |
-
|
| 41 |
def _balance_math_fences(s: str) -> str:
|
|
|
|
|
|
|
|
|
|
| 42 |
# $$ blocks
|
| 43 |
-
if s.count('
|
| 44 |
-
s = s.rstrip() + '
|
| 45 |
# \[ \]
|
| 46 |
-
if len(re.findall(r'
|
| 47 |
-
s = s.rstrip() + '
|
| 48 |
# \( \)
|
| 49 |
-
if len(re.findall(r'
|
| 50 |
-
s = s.rstrip() + '
|
| 51 |
|
| 52 |
return s
|
| 53 |
|
|
@@ -56,9 +32,6 @@ def _repair_unbalanced_math(text: str) -> str:
|
|
| 56 |
text = text.replace('\r\n', '\n').replace('\r', '\n')
|
| 57 |
# fix truncated \end{env
|
| 58 |
text = _fix_truncated_end_braces(text)
|
| 59 |
-
text = text + "]"
|
| 60 |
-
# append closing \end{...} for any unclosed math envs we care about
|
| 61 |
-
text = _close_unclosed_envs(text)
|
| 62 |
# make sure $$ / \[ / \( are closed
|
| 63 |
text = _balance_math_fences(text)
|
| 64 |
return text
|
|
|
|
| 11 |
def _fix_truncated_end_braces(s: str) -> str:
|
| 12 |
return re.sub(r'(\\end\{[A-Za-z]+(?:\*)?)(?=\s|$)', r'\1}', s)
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
def _balance_math_fences(s: str) -> str:
|
| 15 |
+
# {}
|
| 16 |
+
if len(re.findall(r'\{', s)) > len(re.findall(r'\}', s)):
|
| 17 |
+
s = s.rstrip() + r'\}'
|
| 18 |
# $$ blocks
|
| 19 |
+
if s.count('$') % 2 == 1:
|
| 20 |
+
s = s.rstrip() + r'$'
|
| 21 |
# \[ \]
|
| 22 |
+
if len(re.findall(r'\[', s)) > len(re.findall(r'\]', s)):
|
| 23 |
+
s = s.rstrip() + r']'
|
| 24 |
# \( \)
|
| 25 |
+
if len(re.findall(r'\(', s)) > len(re.findall(r'\)', s)):
|
| 26 |
+
s = s.rstrip() + r')'
|
| 27 |
|
| 28 |
return s
|
| 29 |
|
|
|
|
| 32 |
text = text.replace('\r\n', '\n').replace('\r', '\n')
|
| 33 |
# fix truncated \end{env
|
| 34 |
text = _fix_truncated_end_braces(text)
|
|
|
|
|
|
|
|
|
|
| 35 |
# make sure $$ / \[ / \( are closed
|
| 36 |
text = _balance_math_fences(text)
|
| 37 |
return text
|
src/streamlit_app.py
CHANGED
|
@@ -192,7 +192,8 @@ def search_theorems(query, model, theorems_data, embeddings_db):
|
|
| 192 |
|
| 193 |
if theorem_info["global_context"]:
|
| 194 |
cleaned_ctx = clean_latex_for_display(theorem_info["global_context"])
|
| 195 |
-
|
|
|
|
| 196 |
st.write("")
|
| 197 |
|
| 198 |
cleaned_content = clean_latex_for_display(theorem_info['theorem_body'])
|
|
@@ -220,4 +221,4 @@ if model and theorems_data:
|
|
| 220 |
if st.button("Search") or user_query:
|
| 221 |
search_theorems(user_query, model, theorems_data, corpus_embeddings)
|
| 222 |
else:
|
| 223 |
-
st.error("Could not load the model or data from RDS. Please check your RDS database connection and credentials.")
|
|
|
|
| 192 |
|
| 193 |
if theorem_info["global_context"]:
|
| 194 |
cleaned_ctx = clean_latex_for_display(theorem_info["global_context"])
|
| 195 |
+
blockquote_context = "> " + cleaned_ctx.replace("\n", "\n> ")
|
| 196 |
+
st.markdown(blockquote_context)
|
| 197 |
st.write("")
|
| 198 |
|
| 199 |
cleaned_content = clean_latex_for_display(theorem_info['theorem_body'])
|
|
|
|
| 221 |
if st.button("Search") or user_query:
|
| 222 |
search_theorems(user_query, model, theorems_data, corpus_embeddings)
|
| 223 |
else:
|
| 224 |
+
st.error("Could not load the model or data from RDS. Please check your RDS database connection and credentials.")
|