Amit-kr26's picture
Initial commit: Multimodal Math Mentor
3c25c17
from __future__ import annotations
_reader = None
def _get_reader():
global _reader
if _reader is None:
try:
import easyocr
_reader = easyocr.Reader(["en"], gpu=False)
except ImportError:
raise RuntimeError(
"EasyOCR is not installed. Install it with: pip install easyocr"
)
except Exception as e:
raise RuntimeError(
f"Failed to load EasyOCR (possible OOM on free tier): {e}"
)
return _reader
def handle_image_input(image_path: str) -> dict:
reader = _get_reader()
results = reader.readtext(image_path)
if not results:
return {
"text": "",
"confidence": 0.0,
"input_type": "image",
"bboxes": [],
}
lines: list[str] = []
confidences: list[float] = []
bboxes: list[list] = []
for bbox, text, conf in results:
lines.append(text)
confidences.append(float(conf))
bboxes.append(bbox)
avg_confidence = sum(confidences) / len(confidences) if confidences else 0.0
return {
"text": " ".join(lines),
"confidence": round(float(avg_confidence), 3),
"input_type": "image",
"bboxes": [],
}