R_app / ocr.py
RamizXhah's picture
Upload 14 files
dc83e1c verified
"""OCR (Optical Character Recognition) for extracting text from images."""
import pytesseract
from PIL import Image
import io
import os
from pathlib import Path
# Configure Tesseract path for Windows (adjust if needed)
# For Windows: Download from https://github.com/UB-Mannheim/tesseract/wiki
# Default installation path:
if os.name == 'nt': # Windows
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
def extract_text_from_image(image_data: bytes, language: str = 'eng') -> str:
"""
Extract text from an image using OCR.
Args:
image_data: Image file bytes
language: Language code for OCR
'eng' = English
'urd' = Urdu
'eng+urd' = Both
Returns:
Extracted text string
"""
try:
# Open image from bytes
image = Image.open(io.BytesIO(image_data))
# Convert to RGB if necessary
if image.mode != 'RGB':
image = image.convert('RGB')
# Perform OCR
text = pytesseract.image_to_string(image, lang=language)
# Clean up extracted text
text = text.strip()
if not text:
raise ValueError("No text detected in image")
return text
except Exception as e:
raise RuntimeError(f"OCR failed: {str(e)}")
def get_supported_languages() -> dict:
"""
Get list of supported OCR languages.
Returns:
Dictionary of language codes and names
"""
return {
'eng': 'English',
'urd': 'Urdu',
'ara': 'Arabic',
'hin': 'Hindi',
'eng+urd': 'English + Urdu (Mixed)',
}
def validate_image(image_data: bytes) -> bool:
"""
Validate if the data is a valid image.
Args:
image_data: Image file bytes
Returns:
True if valid image, False otherwise
"""
try:
image = Image.open(io.BytesIO(image_data))
image.verify()
return True
except Exception:
return False
def preprocess_image(image_data: bytes) -> bytes:
"""
Preprocess image for better OCR accuracy.
Applies grayscale conversion and contrast enhancement.
Args:
image_data: Original image bytes
Returns:
Preprocessed image bytes
"""
try:
from PIL import ImageEnhance
image = Image.open(io.BytesIO(image_data))
# Convert to grayscale
image = image.convert('L')
# Enhance contrast
enhancer = ImageEnhance.Contrast(image)
image = enhancer.enhance(2.0)
# Save to bytes
output = io.BytesIO()
image.save(output, format='PNG')
return output.getvalue()
except Exception as e:
# Return original if preprocessing fails
return image_data