Update app.py
Browse files
app.py
CHANGED
|
@@ -15,6 +15,17 @@ model = LatexOCR()
|
|
| 15 |
|
| 16 |
# Clean LaTeX output
|
| 17 |
def clean_latex(latex):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
latex = re.sub(r'\\(cal|mathcal)\s*X', 'x', latex)
|
| 19 |
latex = re.sub(r'\\(cal|mathcal)\s*Y', 'y', latex)
|
| 20 |
latex = re.sub(r'\\(cal|mathcal)\s*Z', 'z', latex)
|
|
@@ -34,6 +45,7 @@ def clean_latex(latex):
|
|
| 34 |
latex += '=0'
|
| 35 |
return latex
|
| 36 |
|
|
|
|
| 37 |
# Fallback to external LLM microservice
|
| 38 |
def request_llm_fallback(bad_latex, llm_url):
|
| 39 |
pre_cleaned = re.sub(
|
|
|
|
| 15 |
|
| 16 |
# Clean LaTeX output
|
| 17 |
def clean_latex(latex):
|
| 18 |
+
# Fix common OCR misreads like \chi → x
|
| 19 |
+
replacements = {
|
| 20 |
+
r'\chi': 'x',
|
| 21 |
+
r'\xi': 'x',
|
| 22 |
+
r'\alpha': 'a',
|
| 23 |
+
r'\beta': 'b',
|
| 24 |
+
r'\gamma': 'y'
|
| 25 |
+
}
|
| 26 |
+
for wrong, correct in replacements.items():
|
| 27 |
+
latex = latex.replace(wrong, correct)
|
| 28 |
+
|
| 29 |
latex = re.sub(r'\\(cal|mathcal)\s*X', 'x', latex)
|
| 30 |
latex = re.sub(r'\\(cal|mathcal)\s*Y', 'y', latex)
|
| 31 |
latex = re.sub(r'\\(cal|mathcal)\s*Z', 'z', latex)
|
|
|
|
| 45 |
latex += '=0'
|
| 46 |
return latex
|
| 47 |
|
| 48 |
+
|
| 49 |
# Fallback to external LLM microservice
|
| 50 |
def request_llm_fallback(bad_latex, llm_url):
|
| 51 |
pre_cleaned = re.sub(
|