Sophie commited on
Commit
6de8c39
·
1 Parent(s): ab99a3d

minor fixes

Browse files
Files changed (2) hide show
  1. src/latex_clean.py +9 -36
  2. src/streamlit_app.py +3 -2
src/latex_clean.py CHANGED
@@ -11,43 +11,19 @@ _MATH_ENVS = [
11
  def _fix_truncated_end_braces(s: str) -> str:
12
  return re.sub(r'(\\end\{[A-Za-z]+(?:\*)?)(?=\s|$)', r'\1}', s)
13
 
14
- def _close_unclosed_envs(s: str) -> str:
15
- token = re.compile(
16
- r'\\begin\{(?P<b_env>[A-Za-z]+)(?P<b_star>\*)?\}'
17
- r'|\\end\{(?P<e_env>[A-Za-z]+)(?P<e_star>\*)?}?',
18
- re.DOTALL
19
- )
20
-
21
- stack = []
22
- for m in token.finditer(s):
23
- if m.group('b_env'):
24
- env = m.group('b_env')
25
- star = m.group('b_star') or ''
26
- if env in _MATH_ENVS:
27
- stack.append((env, star))
28
- else:
29
- env = m.group('e_env')
30
- star = m.group('e_star') or ''
31
- if stack and stack[-1] == (env, star):
32
- stack.pop()
33
-
34
- if not stack:
35
- return s
36
-
37
- # Append missing delimiters in reverse order
38
- closers = ''.join(f'\n\\end{{{env}{star}}}' for env, star in reversed(stack))
39
- return s + closers
40
-
41
  def _balance_math_fences(s: str) -> str:
 
 
 
42
  # $$ blocks
43
- if s.count('$$') % 2 == 1:
44
- s = s.rstrip() + '\n$$'
45
  # \[ \]
46
- if len(re.findall(r'\\\[', s)) > len(re.findall(r'\\\]', s)):
47
- s = s.rstrip() + '\n\\]'
48
  # \( \)
49
- if len(re.findall(r'\\\(', s)) > len(re.findall(r'\\\)', s)):
50
- s = s.rstrip() + '\\)'
51
 
52
  return s
53
 
@@ -56,9 +32,6 @@ def _repair_unbalanced_math(text: str) -> str:
56
  text = text.replace('\r\n', '\n').replace('\r', '\n')
57
  # fix truncated \end{env
58
  text = _fix_truncated_end_braces(text)
59
- text = text + "]"
60
- # append closing \end{...} for any unclosed math envs we care about
61
- text = _close_unclosed_envs(text)
62
  # make sure $$ / \[ / \( are closed
63
  text = _balance_math_fences(text)
64
  return text
 
11
  def _fix_truncated_end_braces(s: str) -> str:
12
  return re.sub(r'(\\end\{[A-Za-z]+(?:\*)?)(?=\s|$)', r'\1}', s)
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  def _balance_math_fences(s: str) -> str:
15
+ # {}
16
+ if len(re.findall(r'\{', s)) > len(re.findall(r'\}', s)):
17
+ s = s.rstrip() + r'\}'
18
  # $$ blocks
19
+ if s.count('$') % 2 == 1:
20
+ s = s.rstrip() + r'$'
21
  # \[ \]
22
+ if len(re.findall(r'\[', s)) > len(re.findall(r'\]', s)):
23
+ s = s.rstrip() + r']'
24
  # \( \)
25
+ if len(re.findall(r'\(', s)) > len(re.findall(r'\)', s)):
26
+ s = s.rstrip() + r')'
27
 
28
  return s
29
 
 
32
  text = text.replace('\r\n', '\n').replace('\r', '\n')
33
  # fix truncated \end{env
34
  text = _fix_truncated_end_braces(text)
 
 
 
35
  # make sure $$ / \[ / \( are closed
36
  text = _balance_math_fences(text)
37
  return text
src/streamlit_app.py CHANGED
@@ -192,7 +192,8 @@ def search_theorems(query, model, theorems_data, embeddings_db):
192
 
193
  if theorem_info["global_context"]:
194
  cleaned_ctx = clean_latex_for_display(theorem_info["global_context"])
195
- st.markdown(f"> {cleaned_ctx.replace('\n', '\n> ')}")
 
196
  st.write("")
197
 
198
  cleaned_content = clean_latex_for_display(theorem_info['theorem_body'])
@@ -220,4 +221,4 @@ if model and theorems_data:
220
  if st.button("Search") or user_query:
221
  search_theorems(user_query, model, theorems_data, corpus_embeddings)
222
  else:
223
- st.error("Could not load the model or data from RDS. Please check your RDS database connection and credentials.")
 
192
 
193
  if theorem_info["global_context"]:
194
  cleaned_ctx = clean_latex_for_display(theorem_info["global_context"])
195
+ blockquote_context = "> " + cleaned_ctx.replace("\n", "\n> ")
196
+ st.markdown(blockquote_context)
197
  st.write("")
198
 
199
  cleaned_content = clean_latex_for_display(theorem_info['theorem_body'])
 
221
  if st.button("Search") or user_query:
222
  search_theorems(user_query, model, theorems_data, corpus_embeddings)
223
  else:
224
+ st.error("Could not load the model or data from RDS. Please check your RDS database connection and credentials.")