ImageDataExtractor2 / core /gradio_ocr.py
WebashalarForML's picture
Upload 42 files
fad436e verified
import logging
import os
from gradio_client import Client, handle_file
from .base import BaseOCR
class GradioOCREngine(BaseOCR):
def __init__(self, space_name="WebAshlarWA/glm-ocr-v1"):
self.space_name = space_name
self.client = None
self._initialize_client()
def _initialize_client(self):
try:
self.client = Client(self.space_name)
logging.info(f"Gradio Client initialized for Space: {self.space_name}")
except Exception as e:
logging.error(f"Failed to initialize Gradio Client for {self.space_name}: {e}")
def extract_text(self, image_path: str) -> str:
if not self.client:
logging.error("Gradio Client not initialized.")
return ""
logging.info(f"Gradio OCR: Starting extraction for {os.path.basename(image_path)}")
try:
# According to the user snippet, the input is 'image' and output is a string?
# Or structured data. The snippet used /proses_intelijen
result = self.client.predict(
image=handle_file(image_path),
api_name="/proses_intelijen"
)
if isinstance(result, list) and len(result) > 0:
# Gradio spaces often return lists of [text, score] or similar
return str(result[0])
elif isinstance(result, str):
return result
elif isinstance(result, dict):
# If it's structured, we might need to stringify or handle it elsewhere
# For OCR we expect a string
return result.get('text', str(result))
logging.info(f"Gradio OCR: Successfully extracted text.")
return str(result)
except Exception as e:
logging.error(f"Gradio OCR extraction failed: {e}")
return ""
def process(self, image_path: str) -> str:
return self.extract_text(image_path)