Spaces:

WIPI
/

DeceptivePatternDetector

Running on Zero

DeceptivePatternDetector / py_files /ocr.py

Asmit

Initial commit

9012453 2 months ago

5.7 kB

	"""
	OCR module adapted for HuggingFace Spaces.
	Uses Google Cloud Vision API for text detection.
	"""

	from PIL import Image, ImageDraw, ImageFilter
	from google.cloud import vision
	import numpy as np
	import io
	import os
	import json
	import tempfile
	from py_files.bounding_clustering import QuadTree, Node


	def change_contrast(img, level):
	"""Adjust image contrast for better OCR results."""
	factor = (259 * (level + 255)) / (255 * (259 - level))

	def contrast(c):
	return 128 + factor * (c - 128)

	return img.point(contrast)


	def get_bounding_box_doc(blk):
	"""Extract bounding box coordinates from document text block."""
	vertices = [int(blk.bounding_box.vertices[0].x), int(blk.bounding_box.vertices[0].y),
	int(blk.bounding_box.vertices[2].x), int(blk.bounding_box.vertices[2].y)]
	return vertices


	def get_text_from_image_doc(img, debug=False, get_response=False, resp=None, max_dist=20):
	"""
	Extract text from image using Google Cloud Vision Document Text Detection.
	Adapted for HuggingFace Spaces environment.
	"""
	response = resp
	if resp is None:
	# Initialize the client with credentials from environment
	try:
	# Try to get credentials from environment variable
	google_creds = os.environ.get('GOOGLE_CLOUD_CREDENTIALS')
	if google_creds:
	# Create temporary credentials file
	creds_data = json.loads(google_creds)
	with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
	json.dump(creds_data, f)
	creds_path = f.name
	os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = creds_path

	client = vision.ImageAnnotatorClient()

	# Enhance image contrast for better OCR
	img = change_contrast(img, 20)

	# Convert PIL image to bytes
	imgByteArr = io.BytesIO()
	img.save(imgByteArr, format='PNG')
	image = vision.Image(content=imgByteArr.getvalue())

	# Perform document text detection
	response = client.document_text_detection(image=image)

	# Clean up temporary credentials file
	if google_creds and 'creds_path' in locals():
	try:
	os.unlink(creds_path)
	except:
	pass

	except Exception as e:
	# Fallback: create a dummy response for demo purposes
	print(f"Warning: Google Cloud Vision not available: {e}")
	response = create_dummy_ocr_response(img)

	# Process the response
	word_boxes = []

	if hasattr(response, 'full_text_annotation') and response.full_text_annotation:
	for page in response.full_text_annotation.pages:
	for block in page.blocks:
	if block.confidence < 0.9:
	continue
	if debug:
	print(f"\nBlock confidence: {block.confidence}")
	print(f"Block box: {get_bounding_box_doc(block)}")

	words = ""
	fonts = []
	for paragraph in block.paragraphs:
	for word in paragraph.words:
	word_text = "".join([symbol.text for symbol in word.symbols])
	words += word_text + " "
	word_bbox = get_bounding_box_doc(word)
	fonts.append(abs(word_bbox[3] - word_bbox[1]))

	if debug:
	print(f"Words: {words}")

	if fonts: # Only add if we have font information
	word_boxes.append([words.strip()] + get_bounding_box_doc(block) + [sum(fonts) // len(fonts)])

	# If no text was detected, create a minimal entry
	if not word_boxes:
	word_boxes.append(["No text detected", 0, 0, 100, 20, 12])

	# Create QuadTree for clustering nearby text
	tree = QuadTree(max_dist=max_dist)
	for i in range(len(word_boxes)):
	tree.insert(Node(*tuple(word_boxes[i])))

	if get_response:
	return tree, response
	return tree, {}


	def create_dummy_ocr_response(img):
	"""
	Create a dummy OCR response for demo purposes when Google Cloud Vision is not available.
	This allows the demo to work without requiring actual OCR credentials.
	"""
	W, H = img.size

	# Create a simple mock response object
	class MockResponse:
	def __init__(self):
	self.full_text_annotation = None

	# For demo purposes, we'll just return an empty response
	# In a real scenario, you might want to use an alternative OCR library like pytesseract
	return MockResponse()


	def draw_boxes(img, bound, color, width=5):
	"""Draw bounding boxes on image for visualization."""
	_img = img.copy()
	draw = ImageDraw.Draw(_img)

	x0 = min(bound[0], bound[2]) - 7
	x1 = max(bound[0], bound[2]) + 10
	y0 = min(bound[1], bound[3]) - 7
	y1 = max(bound[1], bound[3]) + 10

	draw.rectangle([x0, y0, x1, y1], outline=color, width=width)
	return _img, x0, y0, x1, y1


	def get_image_with_boxes_doc(image, color='red', width=5, get_response=False, response=None):
	"""Get image with OCR bounding boxes drawn on it."""
	tree, resp = get_text_from_image_doc(image, get_response=get_response, resp=response)
	bxs = tree.get_children(False)
	for bx in bxs:
	image, x0, y0, x1, y1 = draw_boxes(image, bx, color, width)
	if get_response:
	return image, resp
	return image