File size: 1,144 Bytes
ca4bd13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import io
import requests
from PIL import Image

def perform_ocr(image_bytes):
    if not image_bytes:
        raise ValueError("Empty image bytes provided")
    # Validate image bytes
    try:
        Image.open(io.BytesIO(image_bytes)).convert("RGB")
    except Exception as e:
        raise ValueError(f"Invalid image bytes provided: {e}")

    # OCR.space API endpoint and payload (using the free 'helloworld' key)
    api_url = "https://api.ocr.space/parse/image"
    payload = {
        'apikey': 'helloworld',  # Free API key with usage limits
        'language': 'eng'
    }
    files = {
        'file': ('image.jpg', image_bytes)
    }

    response = requests.post(api_url, data=payload, files=files)
    result = response.json()

    if result.get("IsErroredOnProcessing"):
        error = result.get("ErrorMessage") or "Unknown error"
        raise ValueError(f"OCR processing error: {error}")

    parsed_text = result.get("ParsedResults")[0].get("ParsedText", "")
    paragraphs = parsed_text.split('\n')
    formatted_text = "\n\n".join(p.strip() for p in paragraphs if p.strip())
    return formatted_text

export = perform_ocr