import gradio as gr from PIL import Image from pix2tex.cli import LatexOCR import sympy as sp from sympy.parsing.latex import parse_latex import re import os # Optional: import training function from train import train_model # Trigger data download only once if not os.path.exists("dataset/train"): print("๐Ÿš€ Running data preparation scripts...") os.system("python download_data.py") os.system("python generate_csv.py") # Preprocessing def preprocess_handwritten_image(pil_img): return pil_img.convert('RGB') # Load Pix2Tex model model = LatexOCR() # Clean LaTeX output def clean_latex(latex): latex = re.sub(r'\\(cal|mathcal)\s*X', 'x', latex) latex = latex.replace('{', '').replace('}', '') latex = latex.strip().rstrip(',.') latex = re.sub(r'(\d+)\s*\\pi', r'(\1*3.1416)', latex) latex = latex.replace(r'\pi', '3.1416') latex = re.sub(r'(\d+)\s*e', r'(\1*2.7183)', latex) latex = re.sub(r'(?