import logging import os from gradio_client import Client, handle_file from .base import BaseOCR class GradioOCREngine(BaseOCR): def __init__(self, space_name="WebAshlarWA/glm-ocr-v1"): self.space_name = space_name self.client = None self._initialize_client() def _initialize_client(self): try: self.client = Client(self.space_name) logging.info(f"Gradio Client initialized for Space: {self.space_name}") except Exception as e: logging.error(f"Failed to initialize Gradio Client for {self.space_name}: {e}") def extract_text(self, image_path: str) -> str: if not self.client: logging.error("Gradio Client not initialized.") return "" logging.info(f"Gradio OCR: Starting extraction for {os.path.basename(image_path)}") try: # According to the user snippet, the input is 'image' and output is a string? # Or structured data. The snippet used /proses_intelijen result = self.client.predict( image=handle_file(image_path), api_name="/proses_intelijen" ) if isinstance(result, list) and len(result) > 0: # Gradio spaces often return lists of [text, score] or similar return str(result[0]) elif isinstance(result, str): return result elif isinstance(result, dict): # If it's structured, we might need to stringify or handle it elsewhere # For OCR we expect a string return result.get('text', str(result)) logging.info(f"Gradio OCR: Successfully extracted text.") return str(result) except Exception as e: logging.error(f"Gradio OCR extraction failed: {e}") return "" def process(self, image_path: str) -> str: return self.extract_text(image_path)