Spaces:

rishach
/

math-chatbot-v2

Sleeping

math-chatbot-v2 / src /edurag_math_bot /image_to_text.py

pranshu dhiman

Deploy MathSutra Space

7fab45b 28 days ago

2.84 kB

	from __future__ import annotations

	import platform
	import shutil
	import subprocess
	import tempfile
	from pathlib import Path


	OCR_SWIFT = """
	import Foundation
	import Vision

	func fail(_ message: String) -> Never {
	FileHandle.standardError.write(Data((message + "\\n").utf8))
	exit(1)
	}

	guard CommandLine.arguments.count > 1 else {
	fail("Missing image path.")
	}

	let imageURL = URL(fileURLWithPath: CommandLine.arguments[1])
	let request = VNRecognizeTextRequest()
	request.recognitionLevel = .accurate
	request.usesLanguageCorrection = true

	if #available(macOS 13.0, *) {
	request.automaticallyDetectsLanguage = true
	}

	do {
	let handler = VNImageRequestHandler(url: imageURL, options: [:])
	try handler.perform([request])
	} catch {
	fail(error.localizedDescription)
	}

	guard let observations = request.results, !observations.isEmpty else {
	fail("No readable text was found in the image.")
	}

	let lines = observations.compactMap { observation in
	observation.topCandidates(1).first?.string
	}

	let cleanedText = lines
	.map { $0.trimmingCharacters(in: .whitespacesAndNewlines) }
	.filter { !$0.isEmpty }
	.joined(separator: "\\n")

	guard !cleanedText.isEmpty else {
	fail("No readable text was found in the image.")
	}

	print(cleanedText)
	"""


	def extract_text_from_image_file(image_path: Path, timeout: int = 120) -> str:
	if platform.system() != "Darwin":
	raise RuntimeError(
	"Question image OCR currently works only on macOS because it uses Apple's Vision framework."
	)

	swift_executable = shutil.which("swift")
	if swift_executable is None:
	raise RuntimeError("Swift is not installed, so question-image OCR cannot run.")

	with tempfile.TemporaryDirectory() as tmp_dir:
	script_path = Path(tmp_dir) / "ocr.swift"
	script_path.write_text(OCR_SWIFT, encoding="utf-8")
	result = subprocess.run(
	[swift_executable, str(script_path), str(image_path)],
	capture_output=True,
	text=True,
	timeout=timeout,
	check=False,
	)

	if result.returncode != 0:
	message = result.stderr.strip() or result.stdout.strip() or "OCR failed."
	raise RuntimeError(message)

	extracted_text = result.stdout.strip()
	if not extracted_text:
	raise RuntimeError("No readable text was found in the image.")
	return extracted_text


	def extract_text_from_image_bytes(
	image_bytes: bytes,
	suffix: str = ".png",
	timeout: int = 120,
	) -> str:
	with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_file:
	tmp_file.write(image_bytes)
	image_path = Path(tmp_file.name)

	try:
	return extract_text_from_image_file(image_path=image_path, timeout=timeout)
	finally:
	image_path.unlink(missing_ok=True)