Spaces:
Sleeping
Sleeping
File size: 1,144 Bytes
ca4bd13 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
import io
import requests
from PIL import Image
def perform_ocr(image_bytes):
if not image_bytes:
raise ValueError("Empty image bytes provided")
# Validate image bytes
try:
Image.open(io.BytesIO(image_bytes)).convert("RGB")
except Exception as e:
raise ValueError(f"Invalid image bytes provided: {e}")
# OCR.space API endpoint and payload (using the free 'helloworld' key)
api_url = "https://api.ocr.space/parse/image"
payload = {
'apikey': 'helloworld', # Free API key with usage limits
'language': 'eng'
}
files = {
'file': ('image.jpg', image_bytes)
}
response = requests.post(api_url, data=payload, files=files)
result = response.json()
if result.get("IsErroredOnProcessing"):
error = result.get("ErrorMessage") or "Unknown error"
raise ValueError(f"OCR processing error: {error}")
parsed_text = result.get("ParsedResults")[0].get("ParsedText", "")
paragraphs = parsed_text.split('\n')
formatted_text = "\n\n".join(p.strip() for p in paragraphs if p.strip())
return formatted_text
export = perform_ocr |