MasteredUltraInstinct commited on
Commit
bec810e
Β·
verified Β·
1 Parent(s): c63cd75

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -15
app.py CHANGED
@@ -15,31 +15,67 @@ model = LatexOCR()
15
 
16
  # Clean LaTeX output
17
  def clean_latex(latex):
18
- # πŸ”§ NEW FIX: Remove bad spacing escapes and redundant slashes
19
- latex = latex.replace('\\ ', '') # Remove space escapes
20
- latex = latex.replace('\\\\', '\\') # Reduce double slashes
21
- latex = re.sub(r'\\[ \t\n\r\f\v]*', '', latex) # Remove \ followed by whitespace or control chars
22
 
23
  # βœ… Fix invalid LaTeX escapes like \+ \- \=
24
  latex = re.sub(r'\\([+\-=])', r'\1', latex)
25
 
26
- # πŸ” Replace common misrecognized symbols
27
  # πŸ” Replace common misrecognized symbols (with and without backslash)
28
  replacements = {
29
- r'\\chi': 'x', # With escape
30
- r'chi': 'x', # After \chi becomes plain
31
- r'\\xi': 'x',
32
- r'xi': 'x',
33
- r'\\alpha': 'a',
34
- r'alpha': 'a',
35
- r'\\beta': 'b',
36
- r'beta': 'b',
37
- r'\\gamma': 'y',
38
- r'gamma': 'y'
39
  }
40
  for wrong, correct in replacements.items():
41
  latex = re.sub(wrong, correct, latex)
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  # Replace mathcal variables
45
  latex = re.sub(r'\\(cal|mathcal)\s*X', 'x', latex)
 
15
 
16
  # Clean LaTeX output
17
  def clean_latex(latex):
18
+ # πŸ”§ Remove bad spacing escapes and redundant slashes
19
+ latex = latex.replace('\\ ', '')
20
+ latex = latex.replace('\\\\', '\\')
21
+ latex = re.sub(r'\\[ \t\n\r\f\v]*', '', latex)
22
 
23
  # βœ… Fix invalid LaTeX escapes like \+ \- \=
24
  latex = re.sub(r'\\([+\-=])', r'\1', latex)
25
 
 
26
  # πŸ” Replace common misrecognized symbols (with and without backslash)
27
  replacements = {
28
+ r'\\chi': 'x', r'chi': 'x',
29
+ r'\\xi': 'x', r'xi': 'x',
30
+ r'\\alpha': 'a', r'alpha': 'a',
31
+ r'\\beta': 'b', r'beta': 'b',
32
+ r'\\gamma': 'y', r'gamma': 'y'
 
 
 
 
 
33
  }
34
  for wrong, correct in replacements.items():
35
  latex = re.sub(wrong, correct, latex)
36
 
37
+ # βœ… Fix mathcal and cal variables in all formats
38
+ latex = re.sub(r'\\(cal|mathcal)\s*\{?\s*X\s*\}?', 'x', latex)
39
+ latex = re.sub(r'\\(cal|mathcal)\s*\{?\s*Y\s*\}?', 'y', latex)
40
+ latex = re.sub(r'\\(cal|mathcal)\s*\{?\s*Z\s*\}?', 'z', latex)
41
+
42
+ # βœ… Also clean up OCR leftovers like `cal X`, `mathcal x`
43
+ latex = latex.replace('cal x', 'x')
44
+ latex = latex.replace('cal X', 'x')
45
+ latex = latex.replace('mathcal x', 'x')
46
+ latex = latex.replace('mathcal X', 'x')
47
+
48
+ # 🧹 Remove curly braces and punctuation
49
+ latex = latex.replace('{', '').replace('}', '')
50
+ latex = latex.strip().rstrip(',.') # remove trailing punctuation
51
+
52
+ # βœ… Replace constants
53
+ latex = re.sub(r'(\d+)\s*\\pi', r'(\1*3.1416)', latex)
54
+ latex = latex.replace(r'\pi', '3.1416')
55
+ latex = re.sub(r'(\d+)\s*e', r'(\1*2.7183)', latex)
56
+ latex = re.sub(r'(?<![a-zA-Z0-9])e(?![a-zA-Z0-9])', '2.7183', latex)
57
+
58
+ # βœ… Add multiplication where missing
59
+ latex = re.sub(r'(\d)([a-zA-Z])', r'\1*\2', latex)
60
+ latex = re.sub(r'(\d+)\s*i', r'\1*I', latex)
61
+ latex = re.sub(r'(?<![a-zA-Z0-9])i(?![a-zA-Z0-9])', 'I', latex)
62
+
63
+ # βœ… (x+1)x β†’ (x+1)*x
64
+ latex = re.sub(r'\(([^()]+?)\)\s*([a-zA-Z](\^\d+)?)', r'(\1)*\2', latex)
65
+
66
+ # βœ… Fix symbols
67
+ latex = latex.replace(r'\cdot', '*')
68
+ latex = latex.replace('βˆ’', '-') # Unicode minus
69
+
70
+ # βœ… Final cleanup: remove any stray garbage symbols
71
+ latex = re.sub(r'[^\w\s^=+*\-().]', '', latex)
72
+
73
+ # βœ… Ensure it's an equation
74
+ if '=' not in latex:
75
+ latex += '=0'
76
+
77
+ return latex
78
+
79
 
80
  # Replace mathcal variables
81
  latex = re.sub(r'\\(cal|mathcal)\s*X', 'x', latex)