Spaces:

iammraat
/

test

Sleeping

App Files Files Community

iammraat commited on 14 days ago

Commit

479016e

verified ·

1 Parent(s): b067fca

Update app (1).py

Browse files

Files changed (1) hide show

app (1).py +129 -841

app (1).py CHANGED Viewed

@@ -1,896 +1,184 @@
-# import gradio as gr
-# from ultralytics import YOLO
-# from transformers import TrOCRProcessor, VisionEncoderDecoderModel
-# from PIL import Image, ImageDraw
-# import torch
-# import logging
-# from datetime import datetime
-# import os
-# import warnings
-# import time
-# # Suppress progress bar and unnecessary logs
-# os.environ['HF_HUB_DISABLE_PROGRESS_BARS'] = '1'
-# warnings.filterwarnings('ignore')
-# logging.getLogger('transformers').setLevel(logging.ERROR)
-# logging.getLogger('ultralytics').setLevel(logging.ERROR)
-# # Setup logging
-# logging.basicConfig(
-#     level=logging.INFO,
-#     format='%(asctime)s - %(levelname)s - %(message)s'
-# )
-# logger = logging.getLogger(__name__)
-# logger.info("Starting model loading...")
-# device = "cuda" if torch.cuda.is_available() else "cpu"
-# logger.info(f"Using device: {device}")
-# # --- ROBUST MODEL LOADING FUNCTION ---
-# def load_model_with_retry(model_class, model_name, token=None, retries=5, delay=5):
-#     """Attempts to load a HF model with retries to handle network timeouts."""
-#     for attempt in range(retries):
-#         try:
-#             logger.info(f"Loading {model_name} (Attempt {attempt + 1}/{retries})...")
-#             if "Processor" in str(model_class):
-#                 return model_class.from_pretrained(model_name, token=token)
-#             else:
-#                 return model_class.from_pretrained(model_name, token=token).to(device)
-#         except Exception as e:
-#             logger.warning(f"Failed to load {model_name}: {e}")
-#             if attempt < retries - 1:
-#                 logger.info(f"Retrying in {delay} seconds...")
-#                 time.sleep(delay)
-#             else:
-#                 logger.error(f"Given up on loading {model_name} after {retries} attempts.")
-#                 raise e
-# try:
-#     # 1. Load YOLO Models (Local Files)
-#     region_model_file = 'regions.pt'
-#     line_model_file = 'lines.pt'
-#     # Simple check for local files
-#     if not os.path.exists(region_model_file):
-#         # Check current directory listing just in case
-#         for file in os.listdir('.'):
-#             if 'region' in file.lower() and file.endswith('.pt'): region_model_file = file
-#             elif 'line' in file.lower() and file.endswith('.pt'): line_model_file = file
-#     if not os.path.exists(region_model_file) or not os.path.exists(line_model_file):
-#         raise FileNotFoundError("YOLO .pt files (regions.pt/lines.pt) not found.")
-#     logger.info("Loading YOLO models...")
-#     region_model = YOLO(region_model_file)
-#     line_model = YOLO(line_model_file)
-#     logger.info("✓ YOLO models loaded")
-#     # 2. Load TrOCR with Retries
-#     hf_token = os.getenv("HF_TOKEN")
-#     processor = load_model_with_retry(TrOCRProcessor, "microsoft/trocr-base-handwritten", token=hf_token)
-#     logger.info("✓ TrOCR processor loaded")
-#     trocr_model = load_model_with_retry(VisionEncoderDecoderModel, "microsoft/trocr-base-handwritten", token=hf_token)
-#     logger.info("✓ TrOCR model loaded")
-#     logger.info("All models loaded successfully!")
-# except Exception as e:
-#     logger.error(f"CRITICAL ERROR loading models: {str(e)}")
-#     raise
-# # --- OCR HELPER ---
-# def run_trocr(image_slice, processor, model, device):
-#     """Runs TrOCR on a single cropped image slice."""
-#     pixel_values = processor(images=image_slice, return_tensors="pt").pixel_values.to(device)
-#     generated_ids = model.generate(pixel_values)
-#     return processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-# def process_document(image):
-#     """Process uploaded document image and extract handwritten text with visualization."""
-#     timestamp = datetime.now().strftime("%H:%M:%S")
-#     log_output = []
-#     def add_log(message, level="INFO"):
-#         log_msg = f"[{timestamp}] {level}: {message}"
-#         log_output.append(log_msg)
-#         if level == "ERROR":
-#             logger.error(message)
-#         else:
-#             logger.info(message)
-#     add_log("Starting document processing")
-#     if image is None:
-#         add_log("No image provided", "ERROR")
-#         return None, "Please upload an image", "\n".join(log_output)
-#     try:
-#         # Prepare Image
-#         if not isinstance(image, Image.Image):
-#             img = Image.open(image).convert("RGB")
-#         else:
-#             img = image.convert("RGB")
-#         # Create a drawing context for the debug image
-#         debug_img = img.copy()
-#         draw = ImageDraw.Draw(debug_img)
-#         width, height = img.size
-#         add_log(f"Image size: {width}x{height} pixels")
-#         all_lines = []
-#         # --- STRATEGY 1: Region Detection ---
-#         add_log("Strategy 1: Running region detection...")
-#         region_results = region_model(img, conf=0.2, imgsz=1024, verbose=False)
-#         regions = region_results[0].boxes
-#         num_regions = len(regions)
-#         add_log(f"✓ Found {num_regions} potential text region(s)")
-#         found_lines_in_regions = False
-#         if num_regions > 0:
-#             for region_idx, region in enumerate(regions):
-#                 add_log(f"Processing region {region_idx + 1}/{num_regions}")
-#                 # Get coordinates
-#                 rx1, ry1, rx2, ry2 = map(int, region.xyxy[0])
-#                 # Filter small artifacts
-#                 if (rx2 - rx1) < 50 or (ry2 - ry1) < 50:
-#                     add_log(f"  Skipping tiny artifact: {rx2-rx1}x{ry2-ry1} px")
-#                     continue
-#                 # Draw GREEN box for Region
-#                 draw.rectangle([rx1, ry1, rx2, ry2], outline="green", width=5)
-#                 # Crop Region
-#                 region_crop = img.crop((rx1, ry1, rx2, ry2))
-#                 # Detect lines in this region
-#                 line_results = line_model(region_crop, conf=0.2, imgsz=1024, verbose=False)
-#                 lines = line_results[0].boxes
-#                 num_lines = len(lines)
-#                 add_log(f"  ✓ Found {num_lines} line(s) in region")
-#                 if num_lines > 0:
-#                     found_lines_in_regions = True
-#                     # Sort lines by Y position
-#                     lines_sorted = sorted(lines, key=lambda b: b.xyxy[0][1])
-#                     for line_idx, line in enumerate(lines_sorted):
-#                         lx1, ly1, lx2, ly2 = map(int, line.xyxy[0])
-#                         # Translate line coordinates back to original image space for drawing
-#                         global_lx1 = rx1 + lx1
-#                         global_ly1 = ry1 + ly1
-#                         global_lx2 = rx1 + lx2
-#                         global_ly2 = ry1 + ly2
-#                         # Draw RED box for Line
-#                         draw.rectangle([global_lx1, global_ly1, global_lx2, global_ly2], outline="red", width=3)
-#                         # OCR
-#                         line_crop = region_crop.crop((lx1, ly1, lx2, ly2))
-#                         text = run_trocr(line_crop, processor, trocr_model, device)
-#                         add_log(f"  Line {line_idx + 1}: '{text}'")
-#                         all_lines.append(text)
-#         # --- STRATEGY 2: Fallback to Full Page ---
-#         if not found_lines_in_regions:
-#             add_log("⚠️ Region detection yielded no lines. Switching to Fallback Strategy...", "WARNING")
-#             add_log("Strategy 2: Running line detection on full page")
-#             line_results = line_model(img, conf=0.2, imgsz=1024, verbose=False)
-#             lines = line_results[0].boxes
-#             num_lines = len(lines)
-#             add_log(f"✓ Fallback found {num_lines} line(s) on full page")
-#             if num_lines > 0:
-#                 lines_sorted = sorted(lines, key=lambda b: b.xyxy[0][1])
-#                 for line_idx, line in enumerate(lines_sorted):
-#                     lx1, ly1, lx2, ly2 = map(int, line.xyxy[0])
-#                     # Draw RED box for Line (on full image)
-#                     draw.rectangle([lx1, ly1, lx2, ly2], outline="red", width=3)
-#                     line_crop = img.crop((lx1, ly1, lx2, ly2))
-#                     text = run_trocr(line_crop, processor, trocr_model, device)
-#                     add_log(f"  Line {line_idx + 1}: '{text}'")
-#                     all_lines.append(text)
-#         if not all_lines:
-#             add_log("Failed to detect any text lines in both strategies", "ERROR")
-#             return debug_img, "No text could be extracted.", "\n".join(log_output)
-#         add_log(f"✓ Success! Extracted {len(all_lines)} total line(s)")
-#         final_text = '\n'.join(all_lines)
-#         return debug_img, final_text, "\n".join(log_output)
-#     except Exception as e:
-#         error_msg = f"Error processing image: {str(e)}"
-#         add_log(error_msg, "ERROR")
-#         logger.exception("Full error traceback:")
-#         # Return the original image if debug creation failed
-#         return image, f"Error: {str(e)}", "\n".join(log_output)
-# # Create Gradio interface
-# demo = gr.Interface(
-#     fn=process_document,
-#     inputs=gr.Image(type="pil", label="Upload Handwritten Document"),
-#     outputs=[
-#         gr.Image(type="pil", label="Debug Visualization (Green=Region, Red=Lines)"),
-#         gr.Textbox(label="Extracted Text", lines=10),
-#         gr.Textbox(label="Processing Logs", lines=15)
-#     ],
-#     title="📝 Handwritten Text Recognition (HTR) with Debugging",
-#     description="""
-#     Upload an image of a handwritten document.
-#     **Visualization Key:**
-#     - 🟩 **Green Box:** The broad region identified as containing text.
-#     - 🟥 **Red Box:** The specific line of text sent to the OCR engine.
-#     """,
-#     flagging_mode="never",
-#     theme=gr.themes.Soft()
-# )
-# if __name__ == "__main__":
-#     logger.info("Launching Gradio interface...")
-#     demo.launch()
-# import gradio as gr
-# from ultralytics import YOLO
-# from transformers import TrOCRProcessor, VisionEncoderDecoderModel
-# from PIL import Image, ImageDraw, ImageFont
-# import torch
-# import logging
-# from datetime import datetime
-# import os
-# import warnings
-# import time
-# # Suppress progress bar and unnecessary logs
-# os.environ['HF_HUB_DISABLE_PROGRESS_BARS'] = '1'
-# warnings.filterwarnings('ignore')
-# logging.getLogger('transformers').setLevel(logging.ERROR)
-# logging.getLogger('ultralytics').setLevel(logging.ERROR)
-# # Setup logging
-# logging.basicConfig(
-#     level=logging.INFO,
-#     format='%(asctime)s - %(levelname)s - %(message)s'
-# )
-# logger = logging.getLogger(__name__)
-# logger.info("Starting model loading...")
-# device = "cuda" if torch.cuda.is_available() else "cpu"
-# logger.info(f"Using device: {device}")
-# # --- ROBUST MODEL LOADING FUNCTION ---
-# def load_model_with_retry(model_class, model_name, token=None, retries=5, delay=5):
-#     """Attempts to load a HF model with retries to handle network timeouts."""
-#     for attempt in range(retries):
-#         try:
-#             logger.info(f"Loading {model_name} (Attempt {attempt + 1}/{retries})...")
-#             if "Processor" in str(model_class):
-#                 return model_class.from_pretrained(model_name, token=token)
-#             else:
-#                 return model_class.from_pretrained(model_name, token=token).to(device)
-#         except Exception as e:
-#             logger.warning(f"Failed to load {model_name}: {e}")
-#             if attempt < retries - 1:
-#                 logger.info(f"Retrying in {delay} seconds...")
-#                 time.sleep(delay)
-#             else:
-#                 logger.error(f"Given up on loading {model_name} after {retries} attempts.")
-#                 raise e
-# try:
-#     # 1. Load YOLO Models (Local Files)
-#     region_model_file = 'regions.pt'
-#     line_model_file = 'lines.pt'
-#     # Simple check for local files
-#     if not os.path.exists(region_model_file):
-#         for file in os.listdir('.'):
-#             if 'region' in file.lower() and file.endswith('.pt'): region_model_file = file
-#             elif 'line' in file.lower() and file.endswith('.pt'): line_model_file = file
-#     if not os.path.exists(region_model_file) or not os.path.exists(line_model_file):
-#         raise FileNotFoundError("YOLO .pt files (regions.pt/lines.pt) not found.")
-#     logger.info("Loading YOLO models...")
-#     region_model = YOLO(region_model_file)
-#     line_model = YOLO(line_model_file)
-#     logger.info("✓ YOLO models loaded")
-#     # 2. Load TrOCR with Retries
-#     hf_token = os.getenv("HF_TOKEN")
-#     processor = load_model_with_retry(TrOCRProcessor, "microsoft/trocr-base-handwritten", token=hf_token)
-#     logger.info("✓ TrOCR processor loaded")
-#     trocr_model = load_model_with_retry(VisionEncoderDecoderModel, "microsoft/trocr-base-handwritten", token=hf_token)
-#     logger.info("✓ TrOCR model loaded")
-#     logger.info("All models loaded successfully!")
-# except Exception as e:
-#     logger.error(f"CRITICAL ERROR loading models: {str(e)}")
-#     raise
-# # --- OCR HELPER ---
-# def run_trocr(image_slice, processor, model, device):
-#     """Runs TrOCR on a single cropped image slice."""
-#     pixel_values = processor(images=image_slice, return_tensors="pt").pixel_values.to(device)
-#     generated_ids = model.generate(pixel_values)
-#     return processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-# def process_document(image, enable_debug_crops=False):
-#     """Process uploaded document image and extract handwritten text with visualization."""
-#     timestamp = datetime.now().strftime("%H:%M:%S")
-#     log_output = []
-#     def add_log(message, level="INFO"):
-#         log_msg = f"[{timestamp}] {level}: {message}"
-#         log_output.append(log_msg)
-#         if level == "ERROR":
-#             logger.error(message)
-#         else:
-#             logger.info(message)
-#     add_log("Starting document processing")
-#     if image is None:
-#         add_log("No image provided", "ERROR")
-#         return None, "Please upload an image", "\n".join(log_output)
-#     try:
-#         # Prepare Image
-#         if not isinstance(image, Image.Image):
-#             img = Image.open(image).convert("RGB")
-#         else:
-#             img = image.convert("RGB")
-#         # Create a drawing context for the debug image
-#         debug_img = img.copy()
-#         draw = ImageDraw.Draw(debug_img)
-#         width, height = img.size
-#         add_log(f"Image size: {width}x{height} pixels")
-#         all_lines = []
-#         debug_crops_dir = "debug_crops"
-#         if enable_debug_crops:
-#             os.makedirs(debug_crops_dir, exist_ok=True)
-#             add_log(f"Debug crops will be saved to {debug_crops_dir}/")
-#         # --- STRATEGY 1: Region Detection ---
-#         add_log("Strategy 1: Running region detection...")
-#         region_results = region_model(img, conf=0.2, imgsz=1024, verbose=False)
-#         regions = region_results[0].boxes
-#         num_regions = len(regions)
-#         add_log(f"✓ Found {num_regions} potential text region(s)")
-#         found_lines_in_regions = False
-#         if num_regions > 0:
-#             for region_idx, region in enumerate(regions):
-#                 add_log(f"Processing region {region_idx + 1}/{num_regions}")
-#                 # FIX 1: Use round() instead of int() to minimize precision loss
-#                 rx1, ry1, rx2, ry2 = map(round, region.xyxy[0].tolist())
-#                 # Calculate region dimensions
-#                 region_width = rx2 - rx1
-#                 region_height = ry2 - ry1
-#                 add_log(f"  Region coords: ({rx1}, {ry1}) → ({rx2}, {ry2}), size: {region_width}x{region_height}")
-#                 # Filter small artifacts
-#                 if region_width < 50 or region_height < 50:
-#                     add_log(f"  Skipping tiny artifact: {region_width}x{region_height} px")
-#                     continue
-#                 # FIX 2: Add padding to region crops to avoid edge effects
-#                 padding = 10
-#                 padded_rx1 = max(0, rx1 - padding)
-#                 padded_ry1 = max(0, ry1 - padding)
-#                 padded_rx2 = min(width, rx2 + padding)
-#                 padded_ry2 = min(height, ry2 + padding)
-#                 add_log(f"  Padded coords: ({padded_rx1}, {padded_ry1}) → ({padded_rx2}, {padded_ry2})")
-#                 # Draw GREEN box for Region (original bounds, not padded)
-#                 draw.rectangle([rx1, ry1, rx2, ry2], outline="green", width=5)
-#                 # Crop Region with padding
-#                 region_crop = img.crop((padded_rx1, padded_ry1, padded_rx2, padded_ry2))
-#                 if enable_debug_crops:
-#                     region_crop.save(f"{debug_crops_dir}/region_{region_idx:02d}.png")
-#                 # Detect lines in this region
-#                 add_log(f"  Running line detection on region crop ({region_crop.size[0]}x{region_crop.size[1]})...")
-#                 line_results = line_model(region_crop, conf=0.2, imgsz=1024, verbose=False)
-#                 lines_data = line_results[0].boxes.xyxy.cpu().numpy()
-#                 num_lines = len(lines_data)
-#                 add_log(f"  ✓ Found {num_lines} line(s) in region")
-#                 if num_lines > 0:
-#                     found_lines_in_regions = True
-#                     # Sort lines by Y position (index 1 of xyxy)
-#                     sorted_indices = lines_data[:, 1].argsort()
-#                     for line_idx, idx in enumerate(sorted_indices):
-#                         # FIX 3: Use round() for line coordinates too
-#                         lx1, ly1, lx2, ly2 = map(round, lines_data[idx].tolist())
-#                         line_width = lx2 - lx1
-#                         line_height = ly2 - ly1
-#                         add_log(f"  Line {line_idx + 1} (local coords): ({lx1}, {ly1}) → ({lx2}, {ly2}), size: {line_width}x{line_height}")
-#                         # FIX 4: Translate line coordinates back to original image space
-#                         # Account for padding offset
-#                         global_lx1 = padded_rx1 + lx1
-#                         global_ly1 = padded_ry1 + ly1
-#                         global_lx2 = padded_rx1 + lx2
-#                         global_ly2 = padded_ry1 + ly2
-#                         # FIX 5: Validate coordinates are within image bounds
-#                         global_lx1 = max(0, min(width, global_lx1))
-#                         global_ly1 = max(0, min(height, global_ly1))
-#                         global_lx2 = max(0, min(width, global_lx2))
-#                         global_ly2 = max(0, min(height, global_ly2))
-#                         add_log(f"  Line {line_idx + 1} (global coords): ({global_lx1}, {global_ly1}) → ({global_lx2}, {global_ly2})")
-#                         # Draw RED box for Line
-#                         draw.rectangle([global_lx1, global_ly1, global_lx2, global_ly2], outline="red", width=3)
-#                         # OCR on the line crop from region_crop
-#                         line_crop = region_crop.crop((lx1, ly1, lx2, ly2))
-#                         if enable_debug_crops:
-#                             line_crop.save(f"{debug_crops_dir}/region_{region_idx:02d}_line_{line_idx:02d}.png")
-#                         text = run_trocr(line_crop, processor, trocr_model, device)
-#                         add_log(f"  Line {line_idx + 1} OCR: '{text}'")
-#                         all_lines.append(text)
-#         # --- STRATEGY 2: Fallback to Full Page ---
-#         if not found_lines_in_regions:
-#             add_log("⚠️ Region detection yielded no lines. Switching to Fallback Strategy...", "WARNING")
-#             add_log("Strategy 2: Running line detection on full page")
-#             line_results = line_model(img, conf=0.2, imgsz=1024, verbose=False)
-#             lines_data = line_results[0].boxes.xyxy.cpu().numpy()
-#             num_lines = len(lines_data)
-#             add_log(f"✓ Fallback found {num_lines} line(s) on full page")
-#             if num_lines > 0:
-#                 sorted_indices = lines_data[:, 1].argsort()
-#                 for line_idx, idx in enumerate(sorted_indices):
-#                     # FIX 6: Use round() consistently
-#                     lx1, ly1, lx2, ly2 = map(round, lines_data[idx].tolist())
-#                     line_width = lx2 - lx1
-#                     line_height = ly2 - ly1
-#                     add_log(f"  Fallback Line {line_idx + 1}: ({lx1}, {ly1}) → ({lx2}, {ly2}), size: {line_width}x{line_height}")
-#                     # FIX 7: Validate coordinates
-#                     lx1 = max(0, min(width, lx1))
-#                     ly1 = max(0, min(height, ly1))
-#                     lx2 = max(0, min(width, lx2))
-#                     ly2 = max(0, min(height, ly2))
-#                     # Draw RED box for Line (on full image)
-#                     draw.rectangle([lx1, ly1, lx2, ly2], outline="red", width=3)
-#                     line_crop = img.crop((lx1, ly1, lx2, ly2))
-#                     if enable_debug_crops:
-#                         line_crop.save(f"{debug_crops_dir}/fullpage_line_{line_idx:02d}.png")
-#                     text = run_trocr(line_crop, processor, trocr_model, device)
-#                     add_log(f"  Fallback Line {line_idx + 1} OCR: '{text}'")
-#                     all_lines.append(text)
-#         if not all_lines:
-#             add_log("Failed to detect any text lines in both strategies", "ERROR")
-#             return debug_img, "No text could be extracted.", "\n".join(log_output)
-#         add_log(f"✓ Success! Extracted {len(all_lines)} total line(s)")
-#         if enable_debug_crops:
-#             add_log(f"✓ Debug crops saved to {debug_crops_dir}/")
-#         final_text = '\n'.join(all_lines)
-#         return debug_img, final_text, "\n".join(log_output)
-#     except Exception as e:
-#         error_msg = f"Error processing image: {str(e)}"
-#         add_log(error_msg, "ERROR")
-#         logger.exception("Full error traceback:")
-#         return image, f"Error: {str(e)}", "\n".join(log_output)
-# # Create Gradio interface
-# demo = gr.Interface(
-#     fn=process_document,
-#     inputs=[
-#         gr.Image(type="pil", label="Upload Handwritten Document"),
-#         gr.Checkbox(label="Save debug crops to disk", value=False)
-#     ],
-#     outputs=[
-#         gr.Image(type="pil", label="Debug Visualization (Green=Region, Red=Lines)"),
-#         gr.Textbox(label="Extracted Text", lines=10),
-#         gr.Textbox(label="Processing Logs", lines=15)
-#     ],
-#     title="📝 Handwritten Text Recognition (HTR) with Enhanced Debugging",
-#     description="""
-#     Upload an image of a handwritten document.
-#     **Visualization Key:**
-#     - 🟩 **Green Box:** The broad region identified as containing text (original bounds).
-#     - 🟥 **Red Box:** The specific line of text sent to the OCR engine (with coordinate validation).
-#     **Improvements:**
-#     - Fixed coordinate rounding (eliminates truncation errors)
-#     - Added 10px padding to region crops (reduces edge effects)
-#     - Coordinate validation (ensures all boxes are within image bounds)
-#     - Enhanced logging with detailed coordinate tracking
-#     - Optional debug crop saving
-#     """,
-#     flagging_mode="never",
-#     theme=gr.themes.Soft()
-# )
-# if __name__ == "__main__":
-#     logger.info("Launching Gradio interface...")
-#     demo.launch()
 import gradio as gr
 from ultralytics import YOLO
-from transformers import TrOCRProcessor, VisionEncoderDecoderModel
-from PIL import Image, ImageDraw
 import torch
 import logging
 import os
-import warnings
-import time
 from datetime import datetime
-# Suppress noisy logs
 os.environ['HF_HUB_DISABLE_PROGRESS_BARS'] = '1'
-warnings.filterwarnings('ignore')
-logging.getLogger('transformers').setLevel(logging.ERROR)
-logging.getLogger('ultralytics').setLevel(logging.WARNING)  # still allow important warnings
-# Setup clean logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s | %(levelname)-5s | %(message)s')
 logger = logging.getLogger(__name__)
-logger.info("Initializing models...")
 device = "cuda" if torch.cuda.is_available() else "cpu"
 logger.info(f"Device: {device}")
-def load_with_retry(cls, name, token=None, retries=4, delay=6):
-    for attempt in range(1, retries + 1):
-        try:
-            logger.info(f"Loading {name} (attempt {attempt}/{retries})")
-            if "Processor" in str(cls):
-                return cls.from_pretrained(name, token=token)
-            return cls.from_pretrained(name, token=token).to(device)
-        except Exception as e:
-            logger.warning(f"Load failed: {e}")
-            if attempt < retries:
-                time.sleep(delay)
-    raise RuntimeError(f"Failed to load {name} after {retries} attempts")
 try:
-    # Locate local YOLO weights
     region_pt = 'regions.pt'
-    line_pt   = 'lines.pt'
     if not os.path.exists(region_pt):
         for f in os.listdir('.'):
             name = f.lower()
-            if 'region' in name and name.endswith('.pt'): region_pt = f
-            if 'line'   in name and name.endswith('.pt'): line_pt   = f
-    if not all(os.path.exists(p) for p in [region_pt, line_pt]):
-        raise FileNotFoundError("Could not find regions.pt and lines.pt (or similar)")
-    logger.info("Loading YOLO models...")
-    region_model = YOLO(region_pt)
-    line_model   = YOLO(line_pt)
-    logger.info("YOLO models loaded")
-    hf_token = os.getenv("HF_TOKEN")
-    processor = load_with_retry(TrOCRProcessor, "microsoft/trocr-base-handwritten", hf_token)
-    trocr     = load_with_retry(VisionEncoderDecoderModel, "microsoft/trocr-base-handwritten", hf_token)
-    logger.info("TrOCR loaded → ready")
 except Exception as e:
-    logger.error(f"Model loading failed: {e}", exc_info=True)
     raise
-def run_ocr(crop: Image.Image) -> str:
-    if crop.width < 20 or crop.height < 12:
-        return ""
-    pixels = processor(images=crop, return_tensors="pt").pixel_values.to(device)
-    ids = trocr.generate(pixels, max_new_tokens=128)
-    return processor.batch_decode(ids, skip_special_tokens=True)[0].strip()
-def process_document(
     image,
-    enable_debug_crops: bool = False,
-    region_imgsz: int = 1024,
-    line_imgsz_base: int = 768,
     conf_thresh: float = 0.25,
 ):
-    start_ts = datetime.now().strftime("%H:%M:%S")
-    logs = []
-    def log(msg: str, level: str = "INFO"):
-        line = f"[{start_ts}] {level:5} {msg}"
-        logs.append(line)
-        if level == "ERROR":
-            logger.error(msg)
-        else:
-            logger.info(msg)
-    log("Start processing")
     if image is None:
-        log("No image uploaded", "ERROR")
-        return None, "Upload an image", "\n".join(logs)
-    try:
-        # ── Prepare ─────────────────────────────────────────────────────────────
-        if not isinstance(image, Image.Image):
-            img = Image.open(image).convert("RGB")
-        else:
-            img = image.convert("RGB")
-        debug_img = img.copy()
-        draw = ImageDraw.Draw(debug_img)
-        w, h = img.size
-        log(f"Input image: {w} × {h} px")
-        debug_dir = "debug_crops"
-        if enable_debug_crops:
-            os.makedirs(debug_dir, exist_ok=True)
-            log(f"Debug crops → {debug_dir}/")
-        extracted = []
-        used_fallback = False
-        # ── Strategy 1: Region → Lines ──────────────────────────────────────────
-        log(f"Running region detection (imgsz={region_imgsz}) …")
-        res_region = region_model(img, conf=conf_thresh, imgsz=region_imgsz, verbose=False)[0]
-        boxes_region = res_region.boxes
-        log(f"→ {len(boxes_region)} region candidate(s) (conf ≥ {conf_thresh})")
-        found_any_line = False
-        for i, box in enumerate(boxes_region, 1):
-            conf = float(box.conf)
-            xyxy = box.xyxy[0].cpu().tolist()
-            rx1, ry1, rx2, ry2 = map(round, xyxy)
-            rw, rh = rx2 - rx1, ry2 - ry1
-            log(f"Region {i}/{len(boxes_region)}  conf={conf:.3f}  {rx1},{ry1} → {rx2},{ry2}  ({rw}×{rh})")
-            if rw < 60 or rh < 40:
-                log(f"  → skipped (too small)")
-                continue
-            # Padding
-            pad = 12
-            px1 = max(0, rx1 - pad)
-            py1 = max(0, ry1 - pad)
-            px2 = min(w, rx2 + pad)
-            py2 = min(h, ry2 + pad)
-            log(f"  Padded crop: {px1},{py1} → {px2},{py2}")
-            draw.rectangle((rx1, ry1, rx2, ry2), outline="green", width=4)
-            crop_region = img.crop((px1, py1, px2, py2))
-            crop_w, crop_h = crop_region.size
-            if enable_debug_crops:
-                crop_region.save(f"{debug_dir}/region_{i:02d}.png")
-            # Adaptive line imgsz: bigger crops → bigger inference size
-            line_sz = line_imgsz_base
-            if max(crop_w, crop_h) > 1400:
-                line_sz = 1280
-            elif max(crop_w, crop_h) < 400:
-                line_sz = 640
-            log(f"  → line detection (imgsz={line_sz}) on {crop_w}×{crop_h} crop …")
-            res_line = line_model(crop_region, conf=conf_thresh, imgsz=line_sz, verbose=False)[0]
-            line_boxes = res_line.boxes
-            log(f"  → {len(line_boxes)} line candidate(s)")
-            if len(line_boxes) == 0:
-                continue
-            found_any_line = True
-            # Sort top → bottom
-            ys = line_boxes.xyxy[:, 1].cpu().numpy()
             order = ys.argsort()
-            for j, idx in enumerate(order, 1):
-                conf_line = float(line_boxes.conf[idx])
-                lx1, ly1, lx2, ly2 = map(round, line_boxes.xyxy[idx].cpu().tolist())
-                lw, lh = lx2 - lx1, ly2 - ly1
-                log(f"    Line {j}  conf={conf_line:.3f}  local {lx1},{ly1} → {lx2},{ly2}  ({lw}×{lh})")
-                # Back to global coordinates
-                gx1 = px1 + lx1
-                gy1 = py1 + ly1
-                gx2 = px1 + lx2
-                gy2 = py1 + ly2
-                # Safety clamp
-                gx1, gy1 = max(0, gx1), max(0, gy1)
-                gx2, gy2 = min(w, gx2), min(h, gy2)
-                log(f"    → global {gx1},{gy1} → {gx2},{gy2}")
-                draw.rectangle((gx1, gy1, gx2, gy2), outline="red", width=3)
-                line_crop = crop_region.crop((lx1, ly1, lx2, ly2))
-                if enable_debug_crops:
-                    line_crop.save(f"{debug_dir}/reg{i:02d}_line{j:02d}_conf{conf_line:.2f}.png")
-                text = run_ocr(line_crop)
-                log(f"    OCR → '{text}'")
-                if text:
-                    extracted.append(text)
-        # ── Strategy 2: Fallback full-page line detection ───────────────────────
-        if not found_any_line:
-            used_fallback = True
-            log("No lines found in regions → fallback: full-page line detection")
-            line_sz = 1024 if max(w, h) > 1800 else line_imgsz_base
-            log(f"Full-page line detection (imgsz={line_sz}) …")
-            res = line_model(img, conf=conf_thresh, imgsz=line_sz, verbose=False)[0]
-            boxes = res.boxes
-            log(f"→ {len(boxes)} line(s) on full page")
-            if len(boxes) > 0:
-                ys = boxes.xyxy[:, 1].cpu().numpy()
-                order = ys.argsort()
-                for j, idx in enumerate(order, 1):
-                    conf = float(boxes.conf[idx])
-                    x1, y1, x2, y2 = map(round, boxes.xyxy[idx].cpu().tolist())
-                    log(f"  Line {j}  conf={conf:.3f}  {x1},{y1} → {x2},{y2}")
-                    draw.rectangle((x1,y1,x2,y2), outline="red", width=3)
-                    crop = img.crop((x1,y1,x2,y2))
-                    if enable_debug_crops:
-                        crop.save(f"{debug_dir}/fallback_line{j:02d}_conf{conf:.2f}.png")
-                    text = run_ocr(crop)
-                    log(f"  OCR → '{text}'")
-                    if text:
-                        extracted.append(text)
-        # ── Finalize ────────────────────────────────────────────────────────────
-        if not extracted:
-            msg = "No readable text lines detected in either strategy"
-            log(msg, "WARNING")
-            return debug_img, msg, "\n".join(logs)
-        log(f"Success — extracted {len(extracted)} line(s)")
-        if enable_debug_crops:
-            log(f"Debug crops saved to {debug_dir}/")
-        return debug_img, "\n".join(extracted), "\n".join(logs)
     except Exception as e:
-        log(f"Processing failed: {e}", "ERROR")
-        logger.exception("Traceback:")
-        return debug_img, f"Error: {str(e)}", "\n".join(logs)
 demo = gr.Interface(
-    fn=process_document,
     inputs=[
-        gr.Image(type="pil", label="Handwritten document"),
-        gr.Checkbox(label="Save debug crops", value=False),
-        gr.Slider(640, 1600, step=64, value=1024, label="Region detection size (imgsz)"),
-        gr.Slider(512, 1280, step=64, value=768,  label="Base line detection size"),
-        gr.Slider(0.15, 0.5, step=0.05, value=0.25, label="Confidence threshold"),
     ],
     outputs=[
-        gr.Image(label="Debug (green=region, red=line)"),
-        gr.Textbox(label="Extracted Text", lines=10),
-        gr.Textbox(label="Detailed Logs (copy these if boxes look wrong)", lines=18),
     ],
-    title="Handwritten Text → OCR + Debug",
     description=(
-        "Green = detected text regions  •  Red = individual text lines sent to TrOCR\n\n"
-        "Copy the **Detailed Logs** if alignment still looks off — especially coords, sizes & confidences."
     ),
     theme=gr.themes.Soft(),
-    flagging_mode="never",
 )
 if __name__ == "__main__":
-    logger.info("Launching interface…")
     demo.launch()

 import gradio as gr
 from ultralytics import YOLO
+from PIL import Image, ImageDraw, ImageFont
 import torch
 import logging
 import os
 from datetime import datetime
+# ── Quiet startup ───────────────────────────────────────────────────────
 os.environ['HF_HUB_DISABLE_PROGRESS_BARS'] = '1'
+logging.getLogger('ultralytics').setLevel(logging.WARNING)
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s | %(level)-5s | %(message)s'
+)
 logger = logging.getLogger(__name__)
+logger.info("Initializing region detector...")
 device = "cuda" if torch.cuda.is_available() else "cpu"
 logger.info(f"Device: {device}")
+# ── Load YOLO ───────────────────────────────────────────────────────────
 try:
     region_pt = 'regions.pt'
     if not os.path.exists(region_pt):
         for f in os.listdir('.'):
             name = f.lower()
+            if name.endswith('.pt') and 'region' in name:
+                region_pt = f
+                break
+    if not os.path.exists(region_pt):
+        raise FileNotFoundError("No regions.pt (or similar *.pt) found in current directory")
+    logger.info(f"Loading model: {region_pt}")
+    model = YOLO(region_pt)
+    logger.info("Region detector loaded")
 except Exception as e:
+    logger.error(f"Model loading failed → {e}", exc_info=True)
     raise
+def visualize_regions(
     image,
     conf_thresh: float = 0.25,
+    min_size: int = 60,
+    padding: int = 0,
+    show_labels: bool = True,
+    save_debug_crops: bool = False,
+    imgsz: int = 1024,
 ):
+    start = datetime.now().strftime("%H:%M:%S")
+    logs = [f"[{start}] Processing started"]
     if image is None:
+        logs.append("No image uploaded")
+        return None, "\n".join(logs)
+    # Load & convert
+    if isinstance(image, str):
+        img = Image.open(image).convert("RGB")
+    else:
+        img = image.convert("RGB")
+    w, h = img.size
+    logs.append(f"Image size: {w} × {h}")
+    debug_img = img.copy()
+    draw = ImageDraw.Draw(debug_img)
+    try:
+        # Font for drawing labels (fallback to default)
+        try:
+            font = ImageFont.truetype("arial.ttf", 18)
+        except:
+            font = ImageFont.load_default()
+        # ── Run detection ───────────────────────────────────────────────
+        results = model(
+            img,
+            conf=conf_thresh,
+            imgsz=imgsz,
+            verbose=False
+        )[0]
+        boxes = results.boxes
+        logs.append(f"Detected {len(boxes)} region candidate(s)")
+        kept = 0
+        # Sort top → bottom
+        if len(boxes) > 0:
+            ys = boxes.xyxy[:, 1].cpu().numpy()
             order = ys.argsort()
+            for idx in order:
+                box = boxes[idx]
+                conf = float(box.conf)
+                if conf < conf_thresh:
+                    continue
+                x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
+                bw, bh = x2 - x1, y2 - y1
+                if bw < min_size or bh < min_size:
+                    continue
+                # Optional padding (mostly for crop saving)
+                px1 = max(0, x1 - padding)
+                py1 = max(0, y1 - padding)
+                px2 = min(w, x2 + padding)
+                py2 = min(h, y2 + padding)
+                # Draw box
+                draw.rectangle((x1, y1, x2, y2), outline="lime", width=3)
+                if show_labels:
+                    label = f"conf {conf:.2f}  {bw}×{bh}"
+                    tw, th = draw.textbbox((0,0), label, font=font)[2:]
+                    draw.rectangle(
+                        (x1, y1 - th - 4, x1 + tw + 8, y1),
+                        fill=(0, 180, 0, 160)
+                    )
+                    draw.text((x1 + 4, y1 - th - 2), label, fill="white", font=font)
+                kept += 1
+                # Optional: save individual crops
+                if save_debug_crops:
+                    os.makedirs("debug_regions", exist_ok=True)
+                    crop = img.crop((px1, py1, px2, py2))
+                    fname = f"debug_regions/r{kept:02d}_conf{conf:.2f}_{bw}x{bh}.png"
+                    crop.save(fname)
+                    logs.append(f"Saved crop → {fname}")
+        if kept == 0:
+            msg = f"No regions kept after filters (conf ≥ {conf_thresh}, size ≥ {min_size}px)"
+            logs.append(msg)
+        else:
+            logs.append(f"Visualized {kept} region(s)")
+        logs.append("Finished.")
+        return debug_img, "\n".join(logs)
     except Exception as e:
+        logs.append(f"Error during inference: {str(e)}")
+        logger.exception("Inference failed")
+        return debug_img, "\n".join(logs)
 demo = gr.Interface(
+    fn=visualize_regions,
     inputs=[
+        gr.Image(type="pil", label="Upload image (handwritten document)"),
+        gr.Slider(0.10, 0.60, step=0.02, value=0.25, label="Confidence threshold"),
+        gr.Slider(30,  300,  step=10,  value=60,   label="Minimum region width/height (px)"),
+        gr.Slider(0,   40,   step=4,   value=0,    label="Padding around box (for crops only)"),
+        gr.Checkbox(label="Draw confidence + size labels on boxes", value=True),
+        gr.Checkbox(label="Save individual region crops to debug_regions/", value=False),
+        gr.Slider(640, 1280, step=64, value=1024, label="Inference image size (imgsz)"),
     ],
     outputs=[
+        gr.Image(label="Detected text regions (green boxes)"),
+        gr.Textbox(label="Log / debug info", lines=14),
     ],
+    title="Region Detector Debug View",
     description=(
+        "Only shows what the region YOLO model sees.\n\n"
+        "• Green boxes = detected text regions\n"
+        "• Tune confidence and min size until boxes look reasonable\n"
+        "• Use logs to see exact confidences and sizes\n"
+        "• Save crops if you want to manually check what is being detected"
     ),
     theme=gr.themes.Soft(),
+    allow_flagging="never",
 )
 if __name__ == "__main__":
+    logger.info("Launching debug interface...")
     demo.launch()