File size: 4,174 Bytes
2f4af3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import cv2
import numpy as np
from PIL import Image

def segment_hieroglyphs(image_path):
    """Segment hieroglyphs from image using OpenCV"""
    try:
        img = cv2.imread(image_path)
        if img is None:
            raise FileNotFoundError(f"Image not found or cannot be read: {image_path}")
        
        # Convert to grayscale and apply adaptive thresholding
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        th = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                   cv2.THRESH_BINARY_INV, 25, 10)
        
        # Apply morphological operations
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
        th = cv2.morphologyEx(th, cv2.MORPH_CLOSE, kernel, iterations=1)
        
        # Find contours
        contours, _ = cv2.findContours(th, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        
        boxes = []
        h_img, w_img = th.shape
        
        for cnt in contours:
            x, y, w, h = cv2.boundingRect(cnt)
            area = w * h
            
            # Filter small areas and full-image contours
            if area < 200:
                continue
            if w > 0.95*w_img or h > 0.95*h_img:
                continue
                
            boxes.append((x, y, w, h))
        
        # If no boxes found, return full image
        if not boxes:
            return [Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))]
        
        # Sort boxes by position (top to bottom, left to right)
        boxes = sorted(boxes, key=lambda b: (b[1]//50, b[0]))
        
        # Extract crops
        crops = []
        for (x, y, w, h) in boxes:
            pad = 6
            x0 = max(0, x - pad)
            y0 = max(0, y - pad)
            x1 = min(w_img, x + w + pad)
            y1 = min(h_img, y + h + pad)
            
            crop = img[y0:y1, x0:x1]
            crops.append(Image.fromarray(cv2.cvtColor(crop, cv2.COLOR_BGR2RGB)))
        
        return crops
        
    except Exception as e:
        print(f"[ERROR] Hieroglyph segmentation failed: {e}")
        return []

def validate_image(file):
    """Validate uploaded image file"""
    from config import Config
    config = Config()
    
    # Check file size
    if hasattr(file, 'content_length') and file.content_length > config.MAX_FILE_SIZE:
        raise ValueError(f"File too large. Maximum size: {config.MAX_FILE_SIZE} bytes")
    
    # Check file extension
    if not file.filename or '.' not in file.filename:
        raise ValueError("Invalid filename")
    
    extension = file.filename.rsplit('.', 1)[1].lower()
    if extension not in config.ALLOWED_EXTENSIONS:
        raise ValueError(f"Invalid file type. Allowed: {', '.join(config.ALLOWED_EXTENSIONS)}")
    
    # Try to open as image
    try:
        image = Image.open(file.stream)
        image.verify()
        file.stream.seek(0)  # Reset stream for later use
        return True
    except Exception:
        raise ValueError("File is not a valid image")

def preprocess_for_latin_ocr(image_path):
    """Specialized preprocessing for Latin texts"""
    try:
        # Load image
        image = cv2.imread(image_path)
        if image is None:
            raise ValueError(f"Cannot load image: {image_path}")
        
        # Convert to grayscale
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        
        # Apply bilateral filter to reduce noise while preserving edges
        filtered = cv2.bilateralFilter(gray, 9, 75, 75)
        
        # Adaptive thresholding for varying lighting
        thresh = cv2.adaptiveThreshold(
            filtered, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, 
            cv2.THRESH_BINARY, 11, 2
        )
        
        return thresh
        
    except Exception as e:
        print(f"[ERROR] Latin preprocessing failed: {e}")
        return None

def enhance_contrast_for_manuscripts(image):
    """Enhanced contrast specifically for manuscript images"""
    # Apply CLAHE (Contrast Limited Adaptive Histogram Equalization)
    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
    enhanced = clahe.apply(image)
    return enhanced