Update app.py
Browse files
app.py
CHANGED
|
@@ -5,19 +5,9 @@ import sympy as sp
|
|
| 5 |
from sympy.parsing.latex import parse_latex
|
| 6 |
import re
|
| 7 |
|
| 8 |
-
# π ADDITION: A lightweight CPU-compatible LLM for fallback
|
| 9 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 10 |
-
import torch
|
| 11 |
-
|
| 12 |
-
device = "cpu"
|
| 13 |
-
# You can swap this model with any CPU-friendly LLM; this one works well.
|
| 14 |
-
llm_name = "mistralai/Mistral-7B-Instruct-v0.2"
|
| 15 |
-
llm_tokenizer = AutoTokenizer.from_pretrained(llm_name, trust_remote_code=True)
|
| 16 |
-
llm_model = AutoModelForCausalLM.from_pretrained(llm_name, trust_remote_code=True, device_map={"": device})
|
| 17 |
-
|
| 18 |
# Preprocessing for handwritten image
|
| 19 |
def preprocess_handwritten_image(pil_img):
|
| 20 |
-
return pil_img.convert('RGB') # Minimal processing
|
| 21 |
|
| 22 |
# Load Pix2Tex model (once)
|
| 23 |
model = LatexOCR()
|
|
@@ -26,107 +16,117 @@ model = LatexOCR()
|
|
| 26 |
def clean_latex(latex):
|
| 27 |
# Replace \mathcal{X} or \cal X with 'x'
|
| 28 |
latex = re.sub(r'\\(cal|mathcal)\s*X', 'x', latex)
|
| 29 |
-
# Replace \mathcal{Y}
|
| 30 |
latex = re.sub(r'\\(cal|mathcal)\s*Y', 'y', latex)
|
|
|
|
| 31 |
latex = re.sub(r'\\(cal|mathcal)\s*Z', 'z', latex)
|
| 32 |
|
| 33 |
# Remove curly braces
|
| 34 |
latex = latex.replace('{', '').replace('}', '')
|
| 35 |
-
latex = latex.strip().rstrip('
|
| 36 |
-
|
| 37 |
-
# Replace
|
| 38 |
latex = re.sub(r'(\d+)\s*\\pi', r'(\1*3.1416)', latex)
|
| 39 |
latex = latex.replace(r'\pi', '3.1416')
|
|
|
|
|
|
|
| 40 |
latex = re.sub(r'(\d+)\s*e', r'(\1*2.7183)', latex)
|
| 41 |
latex = re.sub(r'(?<![a-zA-Z0-9])e(?![a-zA-Z0-9])', '2.7183', latex)
|
| 42 |
|
| 43 |
-
# Insert * between number and variable
|
| 44 |
latex = re.sub(r'(\d)([a-zA-Z])', r'\1*\2', latex)
|
|
|
|
|
|
|
| 45 |
latex = re.sub(r'(\d+)\s*i', r'\1*I', latex)
|
|
|
|
|
|
|
| 46 |
latex = re.sub(r'(?<![a-zA-Z0-9])i(?![a-zA-Z0-9])', 'I', latex)
|
| 47 |
|
| 48 |
-
# Wrap complex coefficients
|
| 49 |
latex = re.sub(r'\(([^()]+?)\)\s*([a-zA-Z](\^\d+)?)', r'(\1)*\2', latex)
|
| 50 |
|
| 51 |
-
# Additional cleanup
|
| 52 |
latex = latex.replace(r'\cdot', '*')
|
| 53 |
-
latex = latex.replace('β', '-') # Unicode minus
|
| 54 |
|
| 55 |
-
# Append '=0' if
|
| 56 |
if '=' not in latex:
|
| 57 |
latex += '=0'
|
| 58 |
|
| 59 |
return latex
|
| 60 |
|
| 61 |
-
# π ADDITION: Function to ask LLM to fix LaTeX for Sympy
|
| 62 |
-
def fix_latex_with_llm(original_latex: str) -> str:
|
| 63 |
-
prompt = (
|
| 64 |
-
"Here is a LaTeX equation extracted from an image:\n"
|
| 65 |
-
f"{original_latex}\n\n"
|
| 66 |
-
"Fix any errors so it becomes valid LaTeX parsable by Sympy.\n"
|
| 67 |
-
"Return only the corrected LaTeX expression."
|
| 68 |
-
)
|
| 69 |
-
tokens = llm_tokenizer(prompt, return_tensors="pt").to(device)
|
| 70 |
-
gen = llm_model.generate(**tokens, max_new_tokens=60)
|
| 71 |
-
out = llm_tokenizer.decode(gen[0], skip_special_tokens=True)
|
| 72 |
-
return out.strip()
|
| 73 |
-
|
| 74 |
# Main function
|
| 75 |
def solve_polynomial(image):
|
| 76 |
try:
|
| 77 |
img = preprocess_handwritten_image(image)
|
| 78 |
latex_result = model(img)
|
| 79 |
|
|
|
|
| 80 |
if not latex_result or len(latex_result.strip()) < 2:
|
| 81 |
return "β Could not extract valid LaTeX from image."
|
| 82 |
|
| 83 |
cleaned_latex = clean_latex(latex_result)
|
| 84 |
|
| 85 |
-
# Attempt parsing
|
| 86 |
try:
|
| 87 |
expr = parse_latex(cleaned_latex)
|
| 88 |
except Exception:
|
| 89 |
expr = None
|
| 90 |
|
| 91 |
-
# Fallback to LLM
|
| 92 |
if expr is None:
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
output = (
|
| 105 |
-
f"## π Extracted LaTeX\n
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
"---\n"
|
| 107 |
-
f"## π§Ή Cleaned
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
"---\n"
|
| 109 |
f"## π§ Parsed Expression\n\n$$ {sp.latex(expr)} $$\n"
|
| 110 |
"---\n"
|
| 111 |
)
|
| 112 |
|
| 113 |
-
# Solve or simplify
|
| 114 |
if isinstance(expr, sp.Equality):
|
| 115 |
lhs = expr.lhs - expr.rhs
|
| 116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
factored = sp.factor(lhs)
|
| 118 |
-
output += f"
|
| 119 |
-
output += "
|
|
|
|
|
|
|
| 120 |
roots = sp.solve(sp.Eq(lhs, 0), dict=True)
|
|
|
|
| 121 |
if roots:
|
| 122 |
-
output += "
|
| 123 |
for i, sol in enumerate(roots, 1):
|
| 124 |
-
for
|
| 125 |
-
output += f"\\text{{Root {i}}}
|
| 126 |
-
output += "\\end{aligned}$$\n"
|
|
|
|
| 127 |
else:
|
| 128 |
simplified = sp.simplify(expr)
|
| 129 |
-
output +=
|
|
|
|
| 130 |
|
| 131 |
return output
|
| 132 |
|
|
@@ -136,11 +136,12 @@ def solve_polynomial(image):
|
|
| 136 |
# Gradio UI
|
| 137 |
demo = gr.Interface(
|
| 138 |
fn=solve_polynomial,
|
| 139 |
-
inputs=gr.Image(type="pil", label="π· Upload Image"),
|
| 140 |
-
outputs=gr.Markdown(label="π Solution"),
|
| 141 |
title="π§ Polynomial Solver from Image",
|
| 142 |
-
description="
|
|
|
|
| 143 |
)
|
| 144 |
|
| 145 |
if __name__ == "__main__":
|
| 146 |
-
demo.launch()
|
|
|
|
| 5 |
from sympy.parsing.latex import parse_latex
|
| 6 |
import re
|
| 7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
# Preprocessing for handwritten image
|
| 9 |
def preprocess_handwritten_image(pil_img):
|
| 10 |
+
return pil_img.convert('RGB') # Minimal processing, just convert to RGB
|
| 11 |
|
| 12 |
# Load Pix2Tex model (once)
|
| 13 |
model = LatexOCR()
|
|
|
|
| 16 |
def clean_latex(latex):
|
| 17 |
# Replace \mathcal{X} or \cal X with 'x'
|
| 18 |
latex = re.sub(r'\\(cal|mathcal)\s*X', 'x', latex)
|
| 19 |
+
# Replace \mathcal{Y} or \cal Y with 'y'
|
| 20 |
latex = re.sub(r'\\(cal|mathcal)\s*Y', 'y', latex)
|
| 21 |
+
# Replace \mathcal{Z} or \cal Z with 'z'
|
| 22 |
latex = re.sub(r'\\(cal|mathcal)\s*Z', 'z', latex)
|
| 23 |
|
| 24 |
# Remove curly braces
|
| 25 |
latex = latex.replace('{', '').replace('}', '')
|
| 26 |
+
latex = latex.strip().rstrip(',.')
|
| 27 |
+
|
| 28 |
+
# Replace coefficients like 5\pi with (5*3.1416)
|
| 29 |
latex = re.sub(r'(\d+)\s*\\pi', r'(\1*3.1416)', latex)
|
| 30 |
latex = latex.replace(r'\pi', '3.1416')
|
| 31 |
+
|
| 32 |
+
# Replace coefficients like 5e with (5*2.7183)
|
| 33 |
latex = re.sub(r'(\d+)\s*e', r'(\1*2.7183)', latex)
|
| 34 |
latex = re.sub(r'(?<![a-zA-Z0-9])e(?![a-zA-Z0-9])', '2.7183', latex)
|
| 35 |
|
| 36 |
+
# Insert * between number and variable (e.g., 45x β 45*x)
|
| 37 |
latex = re.sub(r'(\d)([a-zA-Z])', r'\1*\2', latex)
|
| 38 |
+
|
| 39 |
+
# Replace number followed by i with number*I
|
| 40 |
latex = re.sub(r'(\d+)\s*i', r'\1*I', latex)
|
| 41 |
+
|
| 42 |
+
# Replace standalone i with I
|
| 43 |
latex = re.sub(r'(?<![a-zA-Z0-9])i(?![a-zA-Z0-9])', 'I', latex)
|
| 44 |
|
| 45 |
+
# Wrap complex coefficients with variables: (a+bI)x^n β (a+b*I)*x^n
|
| 46 |
latex = re.sub(r'\(([^()]+?)\)\s*([a-zA-Z](\^\d+)?)', r'(\1)*\2', latex)
|
| 47 |
|
| 48 |
+
# Additional minimal cleanup
|
| 49 |
latex = latex.replace(r'\cdot', '*')
|
| 50 |
+
latex = latex.replace('β', '-') # Unicode minus to ASCII
|
| 51 |
|
| 52 |
+
# Append '=0' if not already present
|
| 53 |
if '=' not in latex:
|
| 54 |
latex += '=0'
|
| 55 |
|
| 56 |
return latex
|
| 57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
# Main function
|
| 59 |
def solve_polynomial(image):
|
| 60 |
try:
|
| 61 |
img = preprocess_handwritten_image(image)
|
| 62 |
latex_result = model(img)
|
| 63 |
|
| 64 |
+
# Check for empty/invalid OCR output
|
| 65 |
if not latex_result or len(latex_result.strip()) < 2:
|
| 66 |
return "β Could not extract valid LaTeX from image."
|
| 67 |
|
| 68 |
cleaned_latex = clean_latex(latex_result)
|
| 69 |
|
|
|
|
| 70 |
try:
|
| 71 |
expr = parse_latex(cleaned_latex)
|
| 72 |
except Exception:
|
| 73 |
expr = None
|
| 74 |
|
|
|
|
| 75 |
if expr is None:
|
| 76 |
+
return (
|
| 77 |
+
"β Could not parse expression from cleaned LaTeX:\n"
|
| 78 |
+
"
|
| 79 |
+
|
| 80 |
+
latex\n"
|
| 81 |
+
f"{cleaned_latex}\n"
|
| 82 |
+
"
|
| 83 |
+
|
| 84 |
+
"
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
output = (
|
| 88 |
+
f"## π Extracted LaTeX\n
|
| 89 |
+
|
| 90 |
+
latex\n{latex_result}\n
|
| 91 |
+
|
| 92 |
+
\n"
|
| 93 |
"---\n"
|
| 94 |
+
f"## π§Ή Cleaned LaTeX Used\n
|
| 95 |
+
|
| 96 |
+
latex\n{cleaned_latex}\n
|
| 97 |
+
|
| 98 |
+
\n"
|
| 99 |
"---\n"
|
| 100 |
f"## π§ Parsed Expression\n\n$$ {sp.latex(expr)} $$\n"
|
| 101 |
"---\n"
|
| 102 |
)
|
| 103 |
|
|
|
|
| 104 |
if isinstance(expr, sp.Equality):
|
| 105 |
lhs = expr.lhs - expr.rhs
|
| 106 |
+
|
| 107 |
+
output += "## βοΈ Step 1: Standard Form of the Polynomial\n"
|
| 108 |
+
output += f"$$ {sp.latex(lhs)} = 0 $$\n"
|
| 109 |
+
output += "---\n"
|
| 110 |
+
|
| 111 |
+
output += "## π§© Step 2: Factor the Polynomial\n"
|
| 112 |
factored = sp.factor(lhs)
|
| 113 |
+
output += f"$$ {sp.latex(factored)} = 0 $$\n"
|
| 114 |
+
output += "---\n"
|
| 115 |
+
|
| 116 |
+
output += "## β
Step 3: Solve for Roots\n"
|
| 117 |
roots = sp.solve(sp.Eq(lhs, 0), dict=True)
|
| 118 |
+
|
| 119 |
if roots:
|
| 120 |
+
output += "$$\n\\begin{aligned}\n"
|
| 121 |
for i, sol in enumerate(roots, 1):
|
| 122 |
+
for var, val in sol.items():
|
| 123 |
+
output += f"\\text{{Root {i}}}:\\quad {var} &= {sp.latex(val)} \\\\\n"
|
| 124 |
+
output += "\\end{aligned}\n$$\n"
|
| 125 |
+
|
| 126 |
else:
|
| 127 |
simplified = sp.simplify(expr)
|
| 128 |
+
output += "## β Simplified Expression\n"
|
| 129 |
+
output += f"$$ {sp.latex(simplified)} $$"
|
| 130 |
|
| 131 |
return output
|
| 132 |
|
|
|
|
| 136 |
# Gradio UI
|
| 137 |
demo = gr.Interface(
|
| 138 |
fn=solve_polynomial,
|
| 139 |
+
inputs=gr.Image(type="pil", label="π· Upload Image of Polynomial"),
|
| 140 |
+
outputs=gr.Markdown(label="π Step-by-step Solution"),
|
| 141 |
title="π§ Polynomial Solver from Image",
|
| 142 |
+
description="Upload an image of a polynomial (typed or handwritten). The app will extract, solve, and explain it step-by-step.",
|
| 143 |
+
allow_flagging="never"
|
| 144 |
)
|
| 145 |
|
| 146 |
if __name__ == "__main__":
|
| 147 |
+
demo.launch()
|