Update app.py
Browse files
app.py
CHANGED
|
@@ -57,25 +57,25 @@ def clean_latex2(latex):
|
|
| 57 |
latex = re.sub(r'\\[ \t\n\r\f\v]*', '', latex)
|
| 58 |
latex = re.sub(r'\\([+\-=])', r'\1', latex)
|
| 59 |
|
| 60 |
-
# Remove
|
| 61 |
latex = re.sub(r'\\text\s*\{.*?\}', '', latex)
|
| 62 |
latex = re.sub(r'\\mathbf\s*\{.*?\}', '', latex)
|
| 63 |
-
latex =
|
| 64 |
-
latex =
|
| 65 |
-
|
|
|
|
| 66 |
latex = latex.replace('−', '-').replace('–', '-')
|
| 67 |
|
| 68 |
replacements = {
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
r'mathcal': '', r'mathrm': '',
|
| 73 |
}
|
| 74 |
for wrong, right in replacements.items():
|
| 75 |
latex = re.sub(wrong, right, latex)
|
| 76 |
|
| 77 |
-
# Remove
|
| 78 |
-
latex =
|
| 79 |
latex = latex.strip().rstrip(',')
|
| 80 |
|
| 81 |
return latex
|
|
|
|
| 57 |
latex = re.sub(r'\\[ \t\n\r\f\v]*', '', latex)
|
| 58 |
latex = re.sub(r'\\([+\-=])', r'\1', latex)
|
| 59 |
|
| 60 |
+
# Remove garbage LaTeX elements
|
| 61 |
latex = re.sub(r'\\text\s*\{.*?\}', '', latex)
|
| 62 |
latex = re.sub(r'\\mathbf\s*\{.*?\}', '', latex)
|
| 63 |
+
latex = re.sub(r'\\mathrm\s*\{.*?\}', '', latex)
|
| 64 |
+
latex = re.sub(r'\\[a-zA-Z]+\s*', '', latex) # kill things like \mathbf, \times, \cdot, \nabla
|
| 65 |
+
|
| 66 |
+
latex = latex.replace('{', '').replace('}', '')
|
| 67 |
latex = latex.replace('−', '-').replace('–', '-')
|
| 68 |
|
| 69 |
replacements = {
|
| 70 |
+
'chi': 'x', 'xi': 'x', 'alpha': 'x',
|
| 71 |
+
'beta': 'b', 'gamma': 'y', 'vartheta': '3',
|
| 72 |
+
'mathcal': '', 'cal': '', 'mathrm': ''
|
|
|
|
| 73 |
}
|
| 74 |
for wrong, right in replacements.items():
|
| 75 |
latex = re.sub(wrong, right, latex)
|
| 76 |
|
| 77 |
+
# Remove remaining junk
|
| 78 |
+
latex = re.sub(r'[^0-9a-zA-Z=+\-*/().,\n ]', '', latex)
|
| 79 |
latex = latex.strip().rstrip(',')
|
| 80 |
|
| 81 |
return latex
|