Spaces:

iammraat
/

test

Sleeping

App Files Files Community

iammraat commited on Jan 30

Commit

b067fca

verified ·

1 Parent(s): 186539d

Upload 2 files

Browse files

Files changed (2) hide show

app (1).py +896 -0
requirements.txt +19 -0

app (1).py ADDED Viewed

	@@ -0,0 +1,896 @@

+# import gradio as gr
+# from ultralytics import YOLO
+# from transformers import TrOCRProcessor, VisionEncoderDecoderModel
+# from PIL import Image, ImageDraw
+# import torch
+# import logging
+# from datetime import datetime
+# import os
+# import warnings
+# import time
+# # Suppress progress bar and unnecessary logs
+# os.environ['HF_HUB_DISABLE_PROGRESS_BARS'] = '1'
+# warnings.filterwarnings('ignore')
+# logging.getLogger('transformers').setLevel(logging.ERROR)
+# logging.getLogger('ultralytics').setLevel(logging.ERROR)
+# # Setup logging
+# logging.basicConfig(
+#     level=logging.INFO,
+#     format='%(asctime)s - %(levelname)s - %(message)s'
+# )
+# logger = logging.getLogger(__name__)
+# logger.info("Starting model loading...")
+# device = "cuda" if torch.cuda.is_available() else "cpu"
+# logger.info(f"Using device: {device}")
+# # --- ROBUST MODEL LOADING FUNCTION ---
+# def load_model_with_retry(model_class, model_name, token=None, retries=5, delay=5):
+#     """Attempts to load a HF model with retries to handle network timeouts."""
+#     for attempt in range(retries):
+#         try:
+#             logger.info(f"Loading {model_name} (Attempt {attempt + 1}/{retries})...")
+#             if "Processor" in str(model_class):
+#                 return model_class.from_pretrained(model_name, token=token)
+#             else:
+#                 return model_class.from_pretrained(model_name, token=token).to(device)
+#         except Exception as e:
+#             logger.warning(f"Failed to load {model_name}: {e}")
+#             if attempt < retries - 1:
+#                 logger.info(f"Retrying in {delay} seconds...")
+#                 time.sleep(delay)
+#             else:
+#                 logger.error(f"Given up on loading {model_name} after {retries} attempts.")
+#                 raise e
+# try:
+#     # 1. Load YOLO Models (Local Files)
+#     region_model_file = 'regions.pt'
+#     line_model_file = 'lines.pt'
+#     # Simple check for local files
+#     if not os.path.exists(region_model_file):
+#         # Check current directory listing just in case
+#         for file in os.listdir('.'):
+#             if 'region' in file.lower() and file.endswith('.pt'): region_model_file = file
+#             elif 'line' in file.lower() and file.endswith('.pt'): line_model_file = file
+#     if not os.path.exists(region_model_file) or not os.path.exists(line_model_file):
+#         raise FileNotFoundError("YOLO .pt files (regions.pt/lines.pt) not found.")
+#     logger.info("Loading YOLO models...")
+#     region_model = YOLO(region_model_file)
+#     line_model = YOLO(line_model_file)
+#     logger.info("✓ YOLO models loaded")
+#     # 2. Load TrOCR with Retries
+#     hf_token = os.getenv("HF_TOKEN")
+#     processor = load_model_with_retry(TrOCRProcessor, "microsoft/trocr-base-handwritten", token=hf_token)
+#     logger.info("✓ TrOCR processor loaded")
+#     trocr_model = load_model_with_retry(VisionEncoderDecoderModel, "microsoft/trocr-base-handwritten", token=hf_token)
+#     logger.info("✓ TrOCR model loaded")
+#     logger.info("All models loaded successfully!")
+# except Exception as e:
+#     logger.error(f"CRITICAL ERROR loading models: {str(e)}")
+#     raise
+# # --- OCR HELPER ---
+# def run_trocr(image_slice, processor, model, device):
+#     """Runs TrOCR on a single cropped image slice."""
+#     pixel_values = processor(images=image_slice, return_tensors="pt").pixel_values.to(device)
+#     generated_ids = model.generate(pixel_values)
+#     return processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+# def process_document(image):
+#     """Process uploaded document image and extract handwritten text with visualization."""
+#     timestamp = datetime.now().strftime("%H:%M:%S")
+#     log_output = []
+#     def add_log(message, level="INFO"):
+#         log_msg = f"[{timestamp}] {level}: {message}"
+#         log_output.append(log_msg)
+#         if level == "ERROR":
+#             logger.error(message)
+#         else:
+#             logger.info(message)
+#     add_log("Starting document processing")
+#     if image is None:
+#         add_log("No image provided", "ERROR")
+#         return None, "Please upload an image", "\n".join(log_output)
+#     try:
+#         # Prepare Image
+#         if not isinstance(image, Image.Image):
+#             img = Image.open(image).convert("RGB")
+#         else:
+#             img = image.convert("RGB")
+#         # Create a drawing context for the debug image
+#         debug_img = img.copy()
+#         draw = ImageDraw.Draw(debug_img)
+#         width, height = img.size
+#         add_log(f"Image size: {width}x{height} pixels")
+#         all_lines = []
+#         # --- STRATEGY 1: Region Detection ---
+#         add_log("Strategy 1: Running region detection...")
+#         region_results = region_model(img, conf=0.2, imgsz=1024, verbose=False)
+#         regions = region_results[0].boxes
+#         num_regions = len(regions)
+#         add_log(f"✓ Found {num_regions} potential text region(s)")
+#         found_lines_in_regions = False
+#         if num_regions > 0:
+#             for region_idx, region in enumerate(regions):
+#                 add_log(f"Processing region {region_idx + 1}/{num_regions}")
+#                 # Get coordinates
+#                 rx1, ry1, rx2, ry2 = map(int, region.xyxy[0])
+#                 # Filter small artifacts
+#                 if (rx2 - rx1) < 50 or (ry2 - ry1) < 50:
+#                     add_log(f"  Skipping tiny artifact: {rx2-rx1}x{ry2-ry1} px")
+#                     continue
+#                 # Draw GREEN box for Region
+#                 draw.rectangle([rx1, ry1, rx2, ry2], outline="green", width=5)
+#                 # Crop Region
+#                 region_crop = img.crop((rx1, ry1, rx2, ry2))
+#                 # Detect lines in this region
+#                 line_results = line_model(region_crop, conf=0.2, imgsz=1024, verbose=False)
+#                 lines = line_results[0].boxes
+#                 num_lines = len(lines)
+#                 add_log(f"  ✓ Found {num_lines} line(s) in region")
+#                 if num_lines > 0:
+#                     found_lines_in_regions = True
+#                     # Sort lines by Y position
+#                     lines_sorted = sorted(lines, key=lambda b: b.xyxy[0][1])
+#                     for line_idx, line in enumerate(lines_sorted):
+#                         lx1, ly1, lx2, ly2 = map(int, line.xyxy[0])
+#                         # Translate line coordinates back to original image space for drawing
+#                         global_lx1 = rx1 + lx1
+#                         global_ly1 = ry1 + ly1
+#                         global_lx2 = rx1 + lx2
+#                         global_ly2 = ry1 + ly2
+#                         # Draw RED box for Line
+#                         draw.rectangle([global_lx1, global_ly1, global_lx2, global_ly2], outline="red", width=3)
+#                         # OCR
+#                         line_crop = region_crop.crop((lx1, ly1, lx2, ly2))
+#                         text = run_trocr(line_crop, processor, trocr_model, device)
+#                         add_log(f"  Line {line_idx + 1}: '{text}'")
+#                         all_lines.append(text)
+#         # --- STRATEGY 2: Fallback to Full Page ---
+#         if not found_lines_in_regions:
+#             add_log("⚠️ Region detection yielded no lines. Switching to Fallback Strategy...", "WARNING")
+#             add_log("Strategy 2: Running line detection on full page")
+#             line_results = line_model(img, conf=0.2, imgsz=1024, verbose=False)
+#             lines = line_results[0].boxes
+#             num_lines = len(lines)
+#             add_log(f"✓ Fallback found {num_lines} line(s) on full page")
+#             if num_lines > 0:
+#                 lines_sorted = sorted(lines, key=lambda b: b.xyxy[0][1])
+#                 for line_idx, line in enumerate(lines_sorted):
+#                     lx1, ly1, lx2, ly2 = map(int, line.xyxy[0])
+#                     # Draw RED box for Line (on full image)
+#                     draw.rectangle([lx1, ly1, lx2, ly2], outline="red", width=3)
+#                     line_crop = img.crop((lx1, ly1, lx2, ly2))
+#                     text = run_trocr(line_crop, processor, trocr_model, device)
+#                     add_log(f"  Line {line_idx + 1}: '{text}'")
+#                     all_lines.append(text)
+#         if not all_lines:
+#             add_log("Failed to detect any text lines in both strategies", "ERROR")
+#             return debug_img, "No text could be extracted.", "\n".join(log_output)
+#         add_log(f"✓ Success! Extracted {len(all_lines)} total line(s)")
+#         final_text = '\n'.join(all_lines)
+#         return debug_img, final_text, "\n".join(log_output)
+#     except Exception as e:
+#         error_msg = f"Error processing image: {str(e)}"
+#         add_log(error_msg, "ERROR")
+#         logger.exception("Full error traceback:")
+#         # Return the original image if debug creation failed
+#         return image, f"Error: {str(e)}", "\n".join(log_output)
+# # Create Gradio interface
+# demo = gr.Interface(
+#     fn=process_document,
+#     inputs=gr.Image(type="pil", label="Upload Handwritten Document"),
+#     outputs=[
+#         gr.Image(type="pil", label="Debug Visualization (Green=Region, Red=Lines)"),
+#         gr.Textbox(label="Extracted Text", lines=10),
+#         gr.Textbox(label="Processing Logs", lines=15)
+#     ],
+#     title="📝 Handwritten Text Recognition (HTR) with Debugging",
+#     description="""
+#     Upload an image of a handwritten document.
+#     **Visualization Key:**
+#     - 🟩 **Green Box:** The broad region identified as containing text.
+#     - 🟥 **Red Box:** The specific line of text sent to the OCR engine.
+#     """,
+#     flagging_mode="never",
+#     theme=gr.themes.Soft()
+# )
+# if __name__ == "__main__":
+#     logger.info("Launching Gradio interface...")
+#     demo.launch()
+# import gradio as gr
+# from ultralytics import YOLO
+# from transformers import TrOCRProcessor, VisionEncoderDecoderModel
+# from PIL import Image, ImageDraw, ImageFont
+# import torch
+# import logging
+# from datetime import datetime
+# import os
+# import warnings
+# import time
+# # Suppress progress bar and unnecessary logs
+# os.environ['HF_HUB_DISABLE_PROGRESS_BARS'] = '1'
+# warnings.filterwarnings('ignore')
+# logging.getLogger('transformers').setLevel(logging.ERROR)
+# logging.getLogger('ultralytics').setLevel(logging.ERROR)
+# # Setup logging
+# logging.basicConfig(
+#     level=logging.INFO,
+#     format='%(asctime)s - %(levelname)s - %(message)s'
+# )
+# logger = logging.getLogger(__name__)
+# logger.info("Starting model loading...")
+# device = "cuda" if torch.cuda.is_available() else "cpu"
+# logger.info(f"Using device: {device}")
+# # --- ROBUST MODEL LOADING FUNCTION ---
+# def load_model_with_retry(model_class, model_name, token=None, retries=5, delay=5):
+#     """Attempts to load a HF model with retries to handle network timeouts."""
+#     for attempt in range(retries):
+#         try:
+#             logger.info(f"Loading {model_name} (Attempt {attempt + 1}/{retries})...")
+#             if "Processor" in str(model_class):
+#                 return model_class.from_pretrained(model_name, token=token)
+#             else:
+#                 return model_class.from_pretrained(model_name, token=token).to(device)
+#         except Exception as e:
+#             logger.warning(f"Failed to load {model_name}: {e}")
+#             if attempt < retries - 1:
+#                 logger.info(f"Retrying in {delay} seconds...")
+#                 time.sleep(delay)
+#             else:
+#                 logger.error(f"Given up on loading {model_name} after {retries} attempts.")
+#                 raise e
+# try:
+#     # 1. Load YOLO Models (Local Files)
+#     region_model_file = 'regions.pt'
+#     line_model_file = 'lines.pt'
+#     # Simple check for local files
+#     if not os.path.exists(region_model_file):
+#         for file in os.listdir('.'):
+#             if 'region' in file.lower() and file.endswith('.pt'): region_model_file = file
+#             elif 'line' in file.lower() and file.endswith('.pt'): line_model_file = file
+#     if not os.path.exists(region_model_file) or not os.path.exists(line_model_file):
+#         raise FileNotFoundError("YOLO .pt files (regions.pt/lines.pt) not found.")
+#     logger.info("Loading YOLO models...")
+#     region_model = YOLO(region_model_file)
+#     line_model = YOLO(line_model_file)
+#     logger.info("✓ YOLO models loaded")
+#     # 2. Load TrOCR with Retries
+#     hf_token = os.getenv("HF_TOKEN")
+#     processor = load_model_with_retry(TrOCRProcessor, "microsoft/trocr-base-handwritten", token=hf_token)
+#     logger.info("✓ TrOCR processor loaded")
+#     trocr_model = load_model_with_retry(VisionEncoderDecoderModel, "microsoft/trocr-base-handwritten", token=hf_token)
+#     logger.info("✓ TrOCR model loaded")
+#     logger.info("All models loaded successfully!")
+# except Exception as e:
+#     logger.error(f"CRITICAL ERROR loading models: {str(e)}")
+#     raise
+# # --- OCR HELPER ---
+# def run_trocr(image_slice, processor, model, device):
+#     """Runs TrOCR on a single cropped image slice."""
+#     pixel_values = processor(images=image_slice, return_tensors="pt").pixel_values.to(device)
+#     generated_ids = model.generate(pixel_values)
+#     return processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+# def process_document(image, enable_debug_crops=False):
+#     """Process uploaded document image and extract handwritten text with visualization."""
+#     timestamp = datetime.now().strftime("%H:%M:%S")
+#     log_output = []
+#     def add_log(message, level="INFO"):
+#         log_msg = f"[{timestamp}] {level}: {message}"
+#         log_output.append(log_msg)
+#         if level == "ERROR":
+#             logger.error(message)
+#         else:
+#             logger.info(message)
+#     add_log("Starting document processing")
+#     if image is None:
+#         add_log("No image provided", "ERROR")
+#         return None, "Please upload an image", "\n".join(log_output)
+#     try:
+#         # Prepare Image
+#         if not isinstance(image, Image.Image):
+#             img = Image.open(image).convert("RGB")
+#         else:
+#             img = image.convert("RGB")
+#         # Create a drawing context for the debug image
+#         debug_img = img.copy()
+#         draw = ImageDraw.Draw(debug_img)
+#         width, height = img.size
+#         add_log(f"Image size: {width}x{height} pixels")
+#         all_lines = []
+#         debug_crops_dir = "debug_crops"
+#         if enable_debug_crops:
+#             os.makedirs(debug_crops_dir, exist_ok=True)
+#             add_log(f"Debug crops will be saved to {debug_crops_dir}/")
+#         # --- STRATEGY 1: Region Detection ---
+#         add_log("Strategy 1: Running region detection...")
+#         region_results = region_model(img, conf=0.2, imgsz=1024, verbose=False)
+#         regions = region_results[0].boxes
+#         num_regions = len(regions)
+#         add_log(f"✓ Found {num_regions} potential text region(s)")
+#         found_lines_in_regions = False
+#         if num_regions > 0:
+#             for region_idx, region in enumerate(regions):
+#                 add_log(f"Processing region {region_idx + 1}/{num_regions}")
+#                 # FIX 1: Use round() instead of int() to minimize precision loss
+#                 rx1, ry1, rx2, ry2 = map(round, region.xyxy[0].tolist())
+#                 # Calculate region dimensions
+#                 region_width = rx2 - rx1
+#                 region_height = ry2 - ry1
+#                 add_log(f"  Region coords: ({rx1}, {ry1}) → ({rx2}, {ry2}), size: {region_width}x{region_height}")
+#                 # Filter small artifacts
+#                 if region_width < 50 or region_height < 50:
+#                     add_log(f"  Skipping tiny artifact: {region_width}x{region_height} px")
+#                     continue
+#                 # FIX 2: Add padding to region crops to avoid edge effects
+#                 padding = 10
+#                 padded_rx1 = max(0, rx1 - padding)
+#                 padded_ry1 = max(0, ry1 - padding)
+#                 padded_rx2 = min(width, rx2 + padding)
+#                 padded_ry2 = min(height, ry2 + padding)
+#                 add_log(f"  Padded coords: ({padded_rx1}, {padded_ry1}) → ({padded_rx2}, {padded_ry2})")
+#                 # Draw GREEN box for Region (original bounds, not padded)
+#                 draw.rectangle([rx1, ry1, rx2, ry2], outline="green", width=5)
+#                 # Crop Region with padding
+#                 region_crop = img.crop((padded_rx1, padded_ry1, padded_rx2, padded_ry2))
+#                 if enable_debug_crops:
+#                     region_crop.save(f"{debug_crops_dir}/region_{region_idx:02d}.png")
+#                 # Detect lines in this region
+#                 add_log(f"  Running line detection on region crop ({region_crop.size[0]}x{region_crop.size[1]})...")
+#                 line_results = line_model(region_crop, conf=0.2, imgsz=1024, verbose=False)
+#                 lines_data = line_results[0].boxes.xyxy.cpu().numpy()
+#                 num_lines = len(lines_data)
+#                 add_log(f"  ✓ Found {num_lines} line(s) in region")
+#                 if num_lines > 0:
+#                     found_lines_in_regions = True
+#                     # Sort lines by Y position (index 1 of xyxy)
+#                     sorted_indices = lines_data[:, 1].argsort()
+#                     for line_idx, idx in enumerate(sorted_indices):
+#                         # FIX 3: Use round() for line coordinates too
+#                         lx1, ly1, lx2, ly2 = map(round, lines_data[idx].tolist())
+#                         line_width = lx2 - lx1
+#                         line_height = ly2 - ly1
+#                         add_log(f"  Line {line_idx + 1} (local coords): ({lx1}, {ly1}) → ({lx2}, {ly2}), size: {line_width}x{line_height}")
+#                         # FIX 4: Translate line coordinates back to original image space
+#                         # Account for padding offset
+#                         global_lx1 = padded_rx1 + lx1
+#                         global_ly1 = padded_ry1 + ly1
+#                         global_lx2 = padded_rx1 + lx2
+#                         global_ly2 = padded_ry1 + ly2
+#                         # FIX 5: Validate coordinates are within image bounds
+#                         global_lx1 = max(0, min(width, global_lx1))
+#                         global_ly1 = max(0, min(height, global_ly1))
+#                         global_lx2 = max(0, min(width, global_lx2))
+#                         global_ly2 = max(0, min(height, global_ly2))
+#                         add_log(f"  Line {line_idx + 1} (global coords): ({global_lx1}, {global_ly1}) → ({global_lx2}, {global_ly2})")
+#                         # Draw RED box for Line
+#                         draw.rectangle([global_lx1, global_ly1, global_lx2, global_ly2], outline="red", width=3)
+#                         # OCR on the line crop from region_crop
+#                         line_crop = region_crop.crop((lx1, ly1, lx2, ly2))
+#                         if enable_debug_crops:
+#                             line_crop.save(f"{debug_crops_dir}/region_{region_idx:02d}_line_{line_idx:02d}.png")
+#                         text = run_trocr(line_crop, processor, trocr_model, device)
+#                         add_log(f"  Line {line_idx + 1} OCR: '{text}'")
+#                         all_lines.append(text)
+#         # --- STRATEGY 2: Fallback to Full Page ---
+#         if not found_lines_in_regions:
+#             add_log("⚠️ Region detection yielded no lines. Switching to Fallback Strategy...", "WARNING")
+#             add_log("Strategy 2: Running line detection on full page")
+#             line_results = line_model(img, conf=0.2, imgsz=1024, verbose=False)
+#             lines_data = line_results[0].boxes.xyxy.cpu().numpy()
+#             num_lines = len(lines_data)
+#             add_log(f"✓ Fallback found {num_lines} line(s) on full page")
+#             if num_lines > 0:
+#                 sorted_indices = lines_data[:, 1].argsort()
+#                 for line_idx, idx in enumerate(sorted_indices):
+#                     # FIX 6: Use round() consistently
+#                     lx1, ly1, lx2, ly2 = map(round, lines_data[idx].tolist())
+#                     line_width = lx2 - lx1
+#                     line_height = ly2 - ly1
+#                     add_log(f"  Fallback Line {line_idx + 1}: ({lx1}, {ly1}) → ({lx2}, {ly2}), size: {line_width}x{line_height}")
+#                     # FIX 7: Validate coordinates
+#                     lx1 = max(0, min(width, lx1))
+#                     ly1 = max(0, min(height, ly1))
+#                     lx2 = max(0, min(width, lx2))
+#                     ly2 = max(0, min(height, ly2))
+#                     # Draw RED box for Line (on full image)
+#                     draw.rectangle([lx1, ly1, lx2, ly2], outline="red", width=3)
+#                     line_crop = img.crop((lx1, ly1, lx2, ly2))
+#                     if enable_debug_crops:
+#                         line_crop.save(f"{debug_crops_dir}/fullpage_line_{line_idx:02d}.png")
+#                     text = run_trocr(line_crop, processor, trocr_model, device)
+#                     add_log(f"  Fallback Line {line_idx + 1} OCR: '{text}'")
+#                     all_lines.append(text)
+#         if not all_lines:
+#             add_log("Failed to detect any text lines in both strategies", "ERROR")
+#             return debug_img, "No text could be extracted.", "\n".join(log_output)
+#         add_log(f"✓ Success! Extracted {len(all_lines)} total line(s)")
+#         if enable_debug_crops:
+#             add_log(f"✓ Debug crops saved to {debug_crops_dir}/")
+#         final_text = '\n'.join(all_lines)
+#         return debug_img, final_text, "\n".join(log_output)
+#     except Exception as e:
+#         error_msg = f"Error processing image: {str(e)}"
+#         add_log(error_msg, "ERROR")
+#         logger.exception("Full error traceback:")
+#         return image, f"Error: {str(e)}", "\n".join(log_output)
+# # Create Gradio interface
+# demo = gr.Interface(
+#     fn=process_document,
+#     inputs=[
+#         gr.Image(type="pil", label="Upload Handwritten Document"),
+#         gr.Checkbox(label="Save debug crops to disk", value=False)
+#     ],
+#     outputs=[
+#         gr.Image(type="pil", label="Debug Visualization (Green=Region, Red=Lines)"),
+#         gr.Textbox(label="Extracted Text", lines=10),
+#         gr.Textbox(label="Processing Logs", lines=15)
+#     ],
+#     title="📝 Handwritten Text Recognition (HTR) with Enhanced Debugging",
+#     description="""
+#     Upload an image of a handwritten document.
+#     **Visualization Key:**
+#     - 🟩 **Green Box:** The broad region identified as containing text (original bounds).
+#     - 🟥 **Red Box:** The specific line of text sent to the OCR engine (with coordinate validation).
+#     **Improvements:**
+#     - Fixed coordinate rounding (eliminates truncation errors)
+#     - Added 10px padding to region crops (reduces edge effects)
+#     - Coordinate validation (ensures all boxes are within image bounds)
+#     - Enhanced logging with detailed coordinate tracking
+#     - Optional debug crop saving
+#     """,
+#     flagging_mode="never",
+#     theme=gr.themes.Soft()
+# )
+# if __name__ == "__main__":
+#     logger.info("Launching Gradio interface...")
+#     demo.launch()
+import gradio as gr
+from ultralytics import YOLO
+from transformers import TrOCRProcessor, VisionEncoderDecoderModel
+from PIL import Image, ImageDraw
+import torch
+import logging
+import os
+import warnings
+import time
+from datetime import datetime
+# Suppress noisy logs
+os.environ['HF_HUB_DISABLE_PROGRESS_BARS'] = '1'
+warnings.filterwarnings('ignore')
+logging.getLogger('transformers').setLevel(logging.ERROR)
+logging.getLogger('ultralytics').setLevel(logging.WARNING)  # still allow important warnings
+# Setup clean logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s | %(levelname)-5s | %(message)s')
+logger = logging.getLogger(__name__)
+logger.info("Initializing models...")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+logger.info(f"Device: {device}")
+def load_with_retry(cls, name, token=None, retries=4, delay=6):
+    for attempt in range(1, retries + 1):
+        try:
+            logger.info(f"Loading {name} (attempt {attempt}/{retries})")
+            if "Processor" in str(cls):
+                return cls.from_pretrained(name, token=token)
+            return cls.from_pretrained(name, token=token).to(device)
+        except Exception as e:
+            logger.warning(f"Load failed: {e}")
+            if attempt < retries:
+                time.sleep(delay)
+    raise RuntimeError(f"Failed to load {name} after {retries} attempts")
+try:
+    # Locate local YOLO weights
+    region_pt = 'regions.pt'
+    line_pt   = 'lines.pt'
+    if not os.path.exists(region_pt):
+        for f in os.listdir('.'):
+            name = f.lower()
+            if 'region' in name and name.endswith('.pt'): region_pt = f
+            if 'line'   in name and name.endswith('.pt'): line_pt   = f
+    if not all(os.path.exists(p) for p in [region_pt, line_pt]):
+        raise FileNotFoundError("Could not find regions.pt and lines.pt (or similar)")
+    logger.info("Loading YOLO models...")
+    region_model = YOLO(region_pt)
+    line_model   = YOLO(line_pt)
+    logger.info("YOLO models loaded")
+    hf_token = os.getenv("HF_TOKEN")
+    processor = load_with_retry(TrOCRProcessor, "microsoft/trocr-base-handwritten", hf_token)
+    trocr     = load_with_retry(VisionEncoderDecoderModel, "microsoft/trocr-base-handwritten", hf_token)
+    logger.info("TrOCR loaded → ready")
+except Exception as e:
+    logger.error(f"Model loading failed: {e}", exc_info=True)
+    raise
+def run_ocr(crop: Image.Image) -> str:
+    if crop.width < 20 or crop.height < 12:
+        return ""
+    pixels = processor(images=crop, return_tensors="pt").pixel_values.to(device)
+    ids = trocr.generate(pixels, max_new_tokens=128)
+    return processor.batch_decode(ids, skip_special_tokens=True)[0].strip()
+def process_document(
+    image,
+    enable_debug_crops: bool = False,
+    region_imgsz: int = 1024,
+    line_imgsz_base: int = 768,
+    conf_thresh: float = 0.25,
+):
+    start_ts = datetime.now().strftime("%H:%M:%S")
+    logs = []
+    def log(msg: str, level: str = "INFO"):
+        line = f"[{start_ts}] {level:5} {msg}"
+        logs.append(line)
+        if level == "ERROR":
+            logger.error(msg)
+        else:
+            logger.info(msg)
+    log("Start processing")
+    if image is None:
+        log("No image uploaded", "ERROR")
+        return None, "Upload an image", "\n".join(logs)
+    try:
+        # ── Prepare ─────────────────────────────────────────────────────────────
+        if not isinstance(image, Image.Image):
+            img = Image.open(image).convert("RGB")
+        else:
+            img = image.convert("RGB")
+        debug_img = img.copy()
+        draw = ImageDraw.Draw(debug_img)
+        w, h = img.size
+        log(f"Input image: {w} × {h} px")
+        debug_dir = "debug_crops"
+        if enable_debug_crops:
+            os.makedirs(debug_dir, exist_ok=True)
+            log(f"Debug crops → {debug_dir}/")
+        extracted = []
+        used_fallback = False
+        # ── Strategy 1: Region → Lines ──────────────────────────────────────────
+        log(f"Running region detection (imgsz={region_imgsz}) …")
+        res_region = region_model(img, conf=conf_thresh, imgsz=region_imgsz, verbose=False)[0]
+        boxes_region = res_region.boxes
+        log(f"→ {len(boxes_region)} region candidate(s) (conf ≥ {conf_thresh})")
+        found_any_line = False
+        for i, box in enumerate(boxes_region, 1):
+            conf = float(box.conf)
+            xyxy = box.xyxy[0].cpu().tolist()
+            rx1, ry1, rx2, ry2 = map(round, xyxy)
+            rw, rh = rx2 - rx1, ry2 - ry1
+            log(f"Region {i}/{len(boxes_region)}  conf={conf:.3f}  {rx1},{ry1} → {rx2},{ry2}  ({rw}×{rh})")
+            if rw < 60 or rh < 40:
+                log(f"  → skipped (too small)")
+                continue
+            # Padding
+            pad = 12
+            px1 = max(0, rx1 - pad)
+            py1 = max(0, ry1 - pad)
+            px2 = min(w, rx2 + pad)
+            py2 = min(h, ry2 + pad)
+            log(f"  Padded crop: {px1},{py1} → {px2},{py2}")
+            draw.rectangle((rx1, ry1, rx2, ry2), outline="green", width=4)
+            crop_region = img.crop((px1, py1, px2, py2))
+            crop_w, crop_h = crop_region.size
+            if enable_debug_crops:
+                crop_region.save(f"{debug_dir}/region_{i:02d}.png")
+            # Adaptive line imgsz: bigger crops → bigger inference size
+            line_sz = line_imgsz_base
+            if max(crop_w, crop_h) > 1400:
+                line_sz = 1280
+            elif max(crop_w, crop_h) < 400:
+                line_sz = 640
+            log(f"  → line detection (imgsz={line_sz}) on {crop_w}×{crop_h} crop …")
+            res_line = line_model(crop_region, conf=conf_thresh, imgsz=line_sz, verbose=False)[0]
+            line_boxes = res_line.boxes
+            log(f"  → {len(line_boxes)} line candidate(s)")
+            if len(line_boxes) == 0:
+                continue
+            found_any_line = True
+            # Sort top → bottom
+            ys = line_boxes.xyxy[:, 1].cpu().numpy()
+            order = ys.argsort()
+            for j, idx in enumerate(order, 1):
+                conf_line = float(line_boxes.conf[idx])
+                lx1, ly1, lx2, ly2 = map(round, line_boxes.xyxy[idx].cpu().tolist())
+                lw, lh = lx2 - lx1, ly2 - ly1
+                log(f"    Line {j}  conf={conf_line:.3f}  local {lx1},{ly1} → {lx2},{ly2}  ({lw}×{lh})")
+                # Back to global coordinates
+                gx1 = px1 + lx1
+                gy1 = py1 + ly1
+                gx2 = px1 + lx2
+                gy2 = py1 + ly2
+                # Safety clamp
+                gx1, gy1 = max(0, gx1), max(0, gy1)
+                gx2, gy2 = min(w, gx2), min(h, gy2)
+                log(f"    → global {gx1},{gy1} → {gx2},{gy2}")
+                draw.rectangle((gx1, gy1, gx2, gy2), outline="red", width=3)
+                line_crop = crop_region.crop((lx1, ly1, lx2, ly2))
+                if enable_debug_crops:
+                    line_crop.save(f"{debug_dir}/reg{i:02d}_line{j:02d}_conf{conf_line:.2f}.png")
+                text = run_ocr(line_crop)
+                log(f"    OCR → '{text}'")
+                if text:
+                    extracted.append(text)
+        # ── Strategy 2: Fallback full-page line detection ───────────────────────
+        if not found_any_line:
+            used_fallback = True
+            log("No lines found in regions → fallback: full-page line detection")
+            line_sz = 1024 if max(w, h) > 1800 else line_imgsz_base
+            log(f"Full-page line detection (imgsz={line_sz}) …")
+            res = line_model(img, conf=conf_thresh, imgsz=line_sz, verbose=False)[0]
+            boxes = res.boxes
+            log(f"→ {len(boxes)} line(s) on full page")
+            if len(boxes) > 0:
+                ys = boxes.xyxy[:, 1].cpu().numpy()
+                order = ys.argsort()
+                for j, idx in enumerate(order, 1):
+                    conf = float(boxes.conf[idx])
+                    x1, y1, x2, y2 = map(round, boxes.xyxy[idx].cpu().tolist())
+                    log(f"  Line {j}  conf={conf:.3f}  {x1},{y1} → {x2},{y2}")
+                    draw.rectangle((x1,y1,x2,y2), outline="red", width=3)
+                    crop = img.crop((x1,y1,x2,y2))
+                    if enable_debug_crops:
+                        crop.save(f"{debug_dir}/fallback_line{j:02d}_conf{conf:.2f}.png")
+                    text = run_ocr(crop)
+                    log(f"  OCR → '{text}'")
+                    if text:
+                        extracted.append(text)
+        # ── Finalize ────────────────────────────────────────────────────────────
+        if not extracted:
+            msg = "No readable text lines detected in either strategy"
+            log(msg, "WARNING")
+            return debug_img, msg, "\n".join(logs)
+        log(f"Success — extracted {len(extracted)} line(s)")
+        if enable_debug_crops:
+            log(f"Debug crops saved to {debug_dir}/")
+        return debug_img, "\n".join(extracted), "\n".join(logs)
+    except Exception as e:
+        log(f"Processing failed: {e}", "ERROR")
+        logger.exception("Traceback:")
+        return debug_img, f"Error: {str(e)}", "\n".join(logs)
+demo = gr.Interface(
+    fn=process_document,
+    inputs=[
+        gr.Image(type="pil", label="Handwritten document"),
+        gr.Checkbox(label="Save debug crops", value=False),
+        gr.Slider(640, 1600, step=64, value=1024, label="Region detection size (imgsz)"),
+        gr.Slider(512, 1280, step=64, value=768,  label="Base line detection size"),
+        gr.Slider(0.15, 0.5, step=0.05, value=0.25, label="Confidence threshold"),
+    ],
+    outputs=[
+        gr.Image(label="Debug (green=region, red=line)"),
+        gr.Textbox(label="Extracted Text", lines=10),
+        gr.Textbox(label="Detailed Logs (copy these if boxes look wrong)", lines=18),
+    ],
+    title="Handwritten Text → OCR + Debug",
+    description=(
+        "Green = detected text regions  •  Red = individual text lines sent to TrOCR\n\n"
+        "Copy the **Detailed Logs** if alignment still looks off — especially coords, sizes & confidences."
+    ),
+    theme=gr.themes.Soft(),
+    flagging_mode="never",
+)
+if __name__ == "__main__":
+    logger.info("Launching interface…")
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,19 @@

+# requirements.txt
+#ultralytics
+#transformers
+#torch
+#pillow
+#numpy
+#gradio
+#pytz
+#huggingface_hub
+gradio
+ultralytics
+transformers
+torch
+torchvision
+pillow
+pytz