import gradio as gr
import cv2
import numpy as np
from PIL import Image

def find_text_lines_voynich(img_pil):
    """Specialized function to find actual Voynich text lines, not page edges"""
    if img_pil is None:
        return None
    
    # Convert to OpenCV format
    img = np.array(img_pil)
    if len(img.shape) == 3:
        gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    else:
        gray = img
    
    img_height, img_width = gray.shape
    print(f"Processing image: {img_width}x{img_height}")
    
    # Skip the top portion where page edges and headers might be
    # Look for text in the middle and lower portions
    skip_top = int(img_height * 0.15)  # Skip top 15%
    search_area = gray[skip_top:, :]
    
    print(f"Searching in area starting from y={skip_top}")
    
    # Enhance contrast specifically for faded manuscript text
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    enhanced = clahe.apply(search_area)
    
    # Use adaptive thresholding which works better for manuscripts
    thresh = cv2.adaptiveThreshold(enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, 
                                   cv2.THRESH_BINARY_INV, 11, 2)
    
    # Create a SMALLER horizontal kernel to connect characters within words
    # Keep it smaller to avoid connecting different lines
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 1))  # Reduced from (8, 1)
    connected = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
    
    # Find contours
    contours, _ = cv2.findContours(connected, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    print(f"Found {len(contours)} contours in search area")
    
    # Filter for text-like contours
    text_contours = []
    search_height, search_width = search_area.shape
    
    for i, contour in enumerate(contours):
        x, y, w, h = cv2.boundingRect(contour)
        area = cv2.contourArea(contour)
        
        # Calculate properties
        aspect_ratio = w / h if h > 0 else 0
        width_percent = (w / search_width) * 100
        height_percent = (h / search_height) * 100
        
        print(f"Contour {i}: pos=({x},{y}), size=({w},{h}), ratio={aspect_ratio:.1f}, w%={width_percent:.1f}, h%={height_percent:.1f}")
        
        # MORE RESTRICTIVE criteria for single text lines:
        if (w >= search_width * 0.15 and    # Minimum width
            h >= 8 and                       # Minimum height (reduced from 10)
            h <= search_height * 0.03 and    # SMALLER maximum height (reduced from 0.05 to 0.03)
            aspect_ratio >= 5.0 and          # HIGHER aspect ratio (increased from 3.0 to 5.0)
            width_percent <= 85 and          # Tighter width limit (reduced from 90 to 85)
            height_percent <= 3.0):          # Additional height percentage limit
            
            text_contours.append((contour, x, y + skip_top, w, h))  # Add skip_top back to y
            print(f"  ✓ ACCEPTED as text line")
        else:
            print(f"  ✗ REJECTED")
    
    print(f"Found {len(text_contours)} potential text lines")
    
    if text_contours:
        # Sort by y-coordinate to get the topmost text line
        text_contours.sort(key=lambda x: x[2])  # Sort by y position
        
        # Take the first text line found
        contour, x, y, w, h = text_contours[0]
        
        print(f"Extracting text line at: x={x}, y={y}, w={w}, h={h}")
        
        # Extract with SMALLER margins to get tighter crop
        margin_x = 15  # Reduced from 30
        margin_y = 10  # Reduced from 20
        y_start = max(0, y - margin_y)
        y_end = min(img_height, y + h + margin_y)
        x_start = max(0, x - margin_x)
        x_end = min(img_width, x + w + margin_x)
        
        extracted = img[y_start:y_end, x_start:x_end]
        
        if extracted.size > 0:
            print(f"Successfully extracted line: {extracted.shape}")
            return Image.fromarray(extracted)
    
    # Fallback: If no text lines found, try scanning line by line in lower portion
    print("No contours found, trying line-by-line scan...")
    return scan_for_text_lines(img, skip_top)

def scan_for_text_lines(img, start_y):
    """Scan line by line looking for text content - modified for single lines"""
    if len(img.shape) == 3:
        gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    else:
        gray = img
    
    img_height, img_width = gray.shape
    
    # Scan from start_y downward
    for y in range(start_y, img_height - 25, 5):  # Smaller strip, check every 5 pixels
        # Take a SMALLER strip (25 pixels instead of 40)
        strip = gray[y:y+25, :]
        
        # Apply threshold
        _, thresh = cv2.threshold(strip, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
        
        # Count dark pixels (ink)
        ink_pixels = np.sum(thresh > 0)
        total_pixels = strip.shape[0] * strip.shape[1]
        ink_ratio = ink_pixels / total_pixels
        
        # Also check if the ink is distributed horizontally (like text)
        # Sum ink pixels in each row
        row_sums = np.sum(thresh, axis=1)
        rows_with_ink = np.sum(row_sums > 0)
        
        print(f"y={y}: ink_ratio={ink_ratio:.3f}, rows_with_ink={rows_with_ink}")
        
        # More restrictive criteria for single lines
        if ink_ratio > 0.02 and ink_ratio < 0.15 and rows_with_ink >= 3 and rows_with_ink <= 15:
            # Expand the region but keep it smaller
            y_start = max(0, y - 8)  # Reduced margin
            y_end = min(img_height, y + 33)  # Smaller total height
            
            if len(img.shape) == 3:
                extracted = img[y_start:y_end, :]
            else:
                extracted = gray[y_start:y_end, :]
                
            print(f"Found text at y={y}, extracting region {y_start}:{y_end}")
            return Image.fromarray(extracted)
    
    # If still nothing found, return a smaller middle section
    print("No text found, returning smaller middle section")
    mid_y = img_height // 2
    section = img[mid_y:mid_y + img_height//8, :]  # Smaller section (1/8 instead of 1/4)
    return Image.fromarray(section)

def preprocess_voynich_image(img_pil):
    """Enhanced preprocessing for Voynich manuscript images"""
    if img_pil is None:
        return None
    
    img = np.array(img_pil)
    
    # Convert to LAB color space
    lab = cv2.cvtColor(img, cv2.COLOR_RGB2LAB)
    l, a, b = cv2.split(lab)
    
    # Apply CLAHE to L channel
    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
    l = clahe.apply(l)
    
    # Merge channels back
    enhanced = cv2.merge([l, a, b])
    enhanced = cv2.cvtColor(enhanced, cv2.COLOR_LAB2RGB)
    
    return Image.fromarray(enhanced)

def debug_voynich_detection(img_pil):
    """Debug function showing the detection process"""
    if img_pil is None:
        return None, None, None, None
    
    img = np.array(img_pil)
    if len(img.shape) == 3:
        gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    else:
        gray = img
    
    img_height, img_width = gray.shape
    
    # Show the search area (skip top 5%)
    skip_top = int(img_height * 0.05)
    search_area = gray[skip_top:, :]
    
    # Create a visualization showing the search area
    search_viz = np.copy(gray)
    cv2.rectangle(search_viz, (0, skip_top), (img_width, img_height), (128), 2)
    
    # Apply CLAHE to search area
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    enhanced = clahe.apply(search_area)
    
    # Apply threshold
    thresh = cv2.adaptiveThreshold(enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, 
                                   cv2.THRESH_BINARY_INV, 11, 2)
    
    # Show full-size threshold result
    thresh_full = np.zeros_like(gray)
    thresh_full[skip_top:, :] = thresh
    
    # Get the final result
    result = find_text_lines_voynich(img_pil)
    
    return (Image.fromarray(search_viz), 
            Image.fromarray(enhanced), 
            Image.fromarray(thresh_full), 
            result)

def extract_text_block(img_pil, start_percent=0.2, height_percent=0.4):
    """Extract a block of text from a specific region"""
    if img_pil is None:
        return None
    
    img = np.array(img_pil)
    img_height = img.shape[0]
    
    start_y = int(img_height * start_percent)
    block_height = int(img_height * height_percent)
    end_y = min(img_height, start_y + block_height)
    
    block = img[start_y:end_y, :]
    return Image.fromarray(block)

def manual_extract_rectangle(img_pil, x_start_percent=0.0, y_start_percent=0.2, 
                           width_percent=1.0, height_percent=0.15):
    """Manually extract a rectangular region from the image"""
    if img_pil is None:
        return None
    
    img = np.array(img_pil)
    img_height, img_width = img.shape[:2]
    
    # Convert percentages to pixel coordinates
    x_start = int(img_width * x_start_percent)
    y_start = int(img_height * y_start_percent)
    width = int(img_width * width_percent)
    height = int(img_height * height_percent)
    
    # Ensure coordinates are within image bounds
    x_start = max(0, min(x_start, img_width - 1))
    y_start = max(0, min(y_start, img_height - 1))
    x_end = min(img_width, x_start + width)
    y_end = min(img_height, y_start + height)
    
    # Extract the rectangle
    rectangle = img[y_start:y_end, x_start:x_end]
    
    print(f"Manual extract: x={x_start}:{x_end}, y={y_start}:{y_end}, size={rectangle.shape}")
    
    if rectangle.size > 0:
        return Image.fromarray(rectangle)
    else:
        return None

def show_rectangle_preview(img_pil, x_start_percent=0.0, y_start_percent=0.2, 
                          width_percent=1.0, height_percent=0.15):
    """Show a preview of the rectangle that will be extracted"""
    if img_pil is None:
        return None
    
    img = np.array(img_pil)
    img_height, img_width = img.shape[:2]
    
    # Convert percentages to pixel coordinates
    x_start = int(img_width * x_start_percent)
    y_start = int(img_height * y_start_percent)
    width = int(img_width * width_percent)
    height = int(img_height * height_percent)
    
    # Ensure coordinates are within image bounds
    x_start = max(0, min(x_start, img_width - 1))
    y_start = max(0, min(y_start, img_height - 1))
    x_end = min(img_width, x_start + width)
    y_end = min(img_height, y_start + height)
    
    # Create a copy of the image to draw on
    preview = np.copy(img)
    
    # Draw rectangle outline
    cv2.rectangle(preview, (x_start, y_start), (x_end, y_end), (255, 0, 0), 2)
    
    # Optional: Add semi-transparent overlay to show selected area
    overlay = np.copy(preview)
    cv2.rectangle(overlay, (x_start, y_start), (x_end, y_end), (0, 255, 0), -1)
    preview = cv2.addWeighted(preview, 0.8, overlay, 0.2, 0)
    
    return Image.fromarray(preview)

# Enhanced Gradio interface
with gr.Blocks(title="Voynich Text Line Extractor - Single Line Focus") as demo:
    gr.Markdown("# Voynich Text Line Extractor - Single Line Focus")
    gr.Markdown("This version is optimized to extract single text lines with tighter bounding boxes.")
    
    with gr.Row():
        with gr.Column():
            input_image = gr.Image(type="pil", label="Upload Voynich Folio")
            
            with gr.Tab("Auto Extract"):
                enhance_btn = gr.Button("Enhance Image")
                extract_btn = gr.Button("Find Text Lines")
                block_btn = gr.Button("Extract Text Block")
                debug_btn = gr.Button("Debug Detection")
                
                # Add slider for text block extraction
                start_slider = gr.Slider(0.1, 0.8, 0.2, label="Start Position (% from top)")
                height_slider = gr.Slider(0.1, 0.6, 0.4, label="Block Height (% of image)")
            
            with gr.Tab("Manual Rectangle"):
                gr.Markdown("### Manual Rectangle Selection")
                gr.Markdown("Adjust the sliders to manually select a rectangular region")
                
                x_start_slider = gr.Slider(0.0, 0.9, 0.0, step=0.01, label="X Start (% from left)")
                y_start_slider = gr.Slider(0.0, 0.9, 0.2, step=0.01, label="Y Start (% from top)")
                width_slider = gr.Slider(0.1, 1.0, 1.0, step=0.01, label="Width (% of image)")
                height_slider_manual = gr.Slider(0.05, 0.5, 0.15, step=0.01, label="Height (% of image)")
                
                preview_btn = gr.Button("Preview Rectangle")
                extract_manual_btn = gr.Button("Extract Rectangle")
                
        with gr.Column():
            enhanced_output = gr.Image(label="Enhanced Image")
            line_output = gr.Image(label="Extracted Text")
            preview_output = gr.Image(label="Rectangle Preview")
    
    with gr.Row():
        debug_search = gr.Image(label="1. Search Area")
        debug_enhanced = gr.Image(label="2. Enhanced")
        debug_thresh = gr.Image(label="3. Threshold")
        debug_result = gr.Image(label="4. Result")
    
    # Auto extract button handlers
    enhance_btn.click(
        fn=preprocess_voynich_image,
        inputs=input_image,
        outputs=enhanced_output
    )
    
    extract_btn.click(
        fn=find_text_lines_voynich,
        inputs=input_image,
        outputs=line_output
    )
    
    block_btn.click(
        fn=extract_text_block,
        inputs=[input_image, start_slider, height_slider],
        outputs=line_output
    )
    
    debug_btn.click(
        fn=debug_voynich_detection,
        inputs=input_image,
        outputs=[debug_search, debug_enhanced, debug_thresh, debug_result]
    )
    
    # Manual rectangle handlers
    preview_btn.click(
        fn=show_rectangle_preview,
        inputs=[input_image, x_start_slider, y_start_slider, width_slider, height_slider_manual],
        outputs=preview_output
    )
    
    extract_manual_btn.click(
        fn=manual_extract_rectangle,
        inputs=[input_image, x_start_slider, y_start_slider, width_slider, height_slider_manual],
        outputs=line_output
    )
    
    # Auto-update preview when sliders change
    for slider in [x_start_slider, y_start_slider, width_slider, height_slider_manual]:
        slider.change(
            fn=show_rectangle_preview,
            inputs=[input_image, x_start_slider, y_start_slider, width_slider, height_slider_manual],
            outputs=preview_output
        )

if __name__ == "__main__":
    demo.launch()