Spaces:

atulisoffline
/

Healthhack-2025

Sleeping

Healthhack-2025 / ocr_model.py

T2LIPthedeveloper

Test deployment

ca4bd13 10 months ago

1.14 kB

	import io
	import requests
	from PIL import Image

	def perform_ocr(image_bytes):
	if not image_bytes:
	raise ValueError("Empty image bytes provided")
	# Validate image bytes
	try:
	Image.open(io.BytesIO(image_bytes)).convert("RGB")
	except Exception as e:
	raise ValueError(f"Invalid image bytes provided: {e}")

	# OCR.space API endpoint and payload (using the free 'helloworld' key)
	api_url = "https://api.ocr.space/parse/image"
	payload = {
	'apikey': 'helloworld', # Free API key with usage limits
	'language': 'eng'
	}
	files = {
	'file': ('image.jpg', image_bytes)
	}

	response = requests.post(api_url, data=payload, files=files)
	result = response.json()

	if result.get("IsErroredOnProcessing"):
	error = result.get("ErrorMessage") or "Unknown error"
	raise ValueError(f"OCR processing error: {error}")

	parsed_text = result.get("ParsedResults")[0].get("ParsedText", "")
	paragraphs = parsed_text.split('\n')
	formatted_text = "\n\n".join(p.strip() for p in paragraphs if p.strip())
	return formatted_text

	export = perform_ocr