from __future__ import annotations import platform import shutil import subprocess import tempfile from pathlib import Path OCR_SWIFT = """ import Foundation import Vision func fail(_ message: String) -> Never { FileHandle.standardError.write(Data((message + "\\n").utf8)) exit(1) } guard CommandLine.arguments.count > 1 else { fail("Missing image path.") } let imageURL = URL(fileURLWithPath: CommandLine.arguments[1]) let request = VNRecognizeTextRequest() request.recognitionLevel = .accurate request.usesLanguageCorrection = true if #available(macOS 13.0, *) { request.automaticallyDetectsLanguage = true } do { let handler = VNImageRequestHandler(url: imageURL, options: [:]) try handler.perform([request]) } catch { fail(error.localizedDescription) } guard let observations = request.results, !observations.isEmpty else { fail("No readable text was found in the image.") } let lines = observations.compactMap { observation in observation.topCandidates(1).first?.string } let cleanedText = lines .map { $0.trimmingCharacters(in: .whitespacesAndNewlines) } .filter { !$0.isEmpty } .joined(separator: "\\n") guard !cleanedText.isEmpty else { fail("No readable text was found in the image.") } print(cleanedText) """ def extract_text_from_image_file(image_path: Path, timeout: int = 120) -> str: if platform.system() != "Darwin": raise RuntimeError( "Question image OCR currently works only on macOS because it uses Apple's Vision framework." ) swift_executable = shutil.which("swift") if swift_executable is None: raise RuntimeError("Swift is not installed, so question-image OCR cannot run.") with tempfile.TemporaryDirectory() as tmp_dir: script_path = Path(tmp_dir) / "ocr.swift" script_path.write_text(OCR_SWIFT, encoding="utf-8") result = subprocess.run( [swift_executable, str(script_path), str(image_path)], capture_output=True, text=True, timeout=timeout, check=False, ) if result.returncode != 0: message = result.stderr.strip() or result.stdout.strip() or "OCR failed." raise RuntimeError(message) extracted_text = result.stdout.strip() if not extracted_text: raise RuntimeError("No readable text was found in the image.") return extracted_text def extract_text_from_image_bytes( image_bytes: bytes, suffix: str = ".png", timeout: int = 120, ) -> str: with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_file: tmp_file.write(image_bytes) image_path = Path(tmp_file.name) try: return extract_text_from_image_file(image_path=image_path, timeout=timeout) finally: image_path.unlink(missing_ok=True)