| |
|
| |
|
| | import base64
|
| | import io
|
| | from PIL import Image
|
| | from transformers import TrOCRProcessor
|
| | from optimum.onnxruntime import ORTModelForVision2Seq
|
| |
|
| | print("🔹 Loading Pix2Text model for Camera → LaTeX...")
|
| |
|
| | processor = TrOCRProcessor.from_pretrained("breezedeus/pix2text-mfr")
|
| | model = ORTModelForVision2Seq.from_pretrained("breezedeus/pix2text-mfr", use_cache=False)
|
| |
|
| |
|
| | def camera_to_latex(image_base64: str) -> str:
|
| | try:
|
| |
|
| | image_data = image_base64.split(",")[1]
|
| | image_bytes = base64.b64decode(image_data)
|
| | image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
|
| |
|
| | pixel_values = processor(images=image, return_tensors="pt").pixel_values
|
| | generated_ids = model.generate(pixel_values)
|
| | text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
| |
|
| | return text.strip()
|
| | except Exception as e:
|
| | print(f"❌ Error in camera_to_latex: {e}")
|
| | return "⚠️ Error generating LaTeX"
|
| |
|