iammraat commited on
Commit
f9d61bf
Β·
verified Β·
1 Parent(s): e82df7f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +362 -149
app.py CHANGED
@@ -1,212 +1,425 @@
1
- import gradio as gr
2
- from ultralytics import YOLO
3
- from PIL import Image, ImageDraw, ImageFont
4
- import torch
5
- import logging
6
- import os
7
- from datetime import datetime
 
 
 
 
 
 
 
 
 
 
8
 
9
- # # ── Quiet startup ───────────────────────────────────────────────────────
10
  # os.environ['HF_HUB_DISABLE_PROGRESS_BARS'] = '1'
11
  # logging.getLogger('ultralytics').setLevel(logging.WARNING)
12
 
 
13
  # logging.basicConfig(
14
  # level=logging.INFO,
15
- # format='%(asctime)s | %(level)-5s | %(message)s'
 
16
  # )
17
  # logger = logging.getLogger(__name__)
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  os.environ['HF_HUB_DISABLE_PROGRESS_BARS'] = '1'
 
 
20
  logging.getLogger('ultralytics').setLevel(logging.WARNING)
21
 
22
- # FIXED logging format: use levelname, not level
23
- logging.basicConfig(
24
- level=logging.INFO,
25
- format='%(asctime)s | %(levelname)-5s | %(message)s', # ← changed level β†’ levelname
26
- datefmt='%Y-%m-%d %H:%M:%S'
27
- )
28
  logger = logging.getLogger(__name__)
29
 
30
- logger.info("Initializing region detector...")
31
-
32
  device = "cuda" if torch.cuda.is_available() else "cpu"
33
  logger.info(f"Device: {device}")
34
 
35
- # ── Load YOLO ───────────────────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  try:
37
- region_pt = 'regions.pt'
38
- if not os.path.exists(region_pt):
 
 
39
  for f in os.listdir('.'):
40
  name = f.lower()
41
- if name.endswith('.pt') and 'region' in name:
42
- region_pt = f
43
  break
44
 
45
- if not os.path.exists(region_pt):
46
- raise FileNotFoundError("No regions.pt (or similar *.pt) found in current directory")
 
 
 
 
47
 
48
- logger.info(f"Loading model: {region_pt}")
49
- model = YOLO(region_pt)
50
- logger.info("Region detector loaded")
 
51
 
52
  except Exception as e:
53
- logger.error(f"Model loading failed β†’ {e}", exc_info=True)
54
  raise
55
 
56
 
57
- def visualize_regions(
 
 
 
 
 
 
 
 
58
  image,
 
 
59
  conf_thresh: float = 0.25,
60
- min_size: int = 60,
61
- padding: int = 0,
62
- show_labels: bool = True,
63
- save_debug_crops: bool = False,
64
- imgsz: int = 1024,
65
  ):
66
- start = datetime.now().strftime("%H:%M:%S")
67
- logs = [f"[{start}] Processing started"]
 
 
 
 
 
 
 
 
 
 
68
 
69
  if image is None:
70
- logs.append("No image uploaded")
71
- return None, "\n".join(logs)
72
 
73
- # Load & convert
74
- if isinstance(image, str):
75
- img = Image.open(image).convert("RGB")
76
- else:
77
- img = image.convert("RGB")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
- w, h = img.size
80
- logs.append(f"Image size: {w} Γ— {h}")
81
 
82
- debug_img = img.copy()
83
- draw = ImageDraw.Draw(debug_img)
84
 
85
- try:
86
- # Font for drawing labels (fallback to default)
87
- try:
88
- font = ImageFont.truetype("arial.ttf", 18)
89
- except:
90
- font = ImageFont.load_default()
91
 
92
- # ── Run detection ───────────────────────────────────────────────
93
- results = model(
94
- img,
95
- conf=conf_thresh,
96
- imgsz=imgsz,
97
- verbose=False
98
- )[0]
99
 
100
- boxes = results.boxes
101
- logs.append(f"Detected {len(boxes)} region candidate(s)")
 
102
 
103
- kept = 0
 
 
104
 
105
- # Sort top β†’ bottom
106
- if len(boxes) > 0:
107
- ys = boxes.xyxy[:, 1].cpu().numpy()
108
- order = ys.argsort()
109
-
110
- for idx in order:
111
- box = boxes[idx]
112
- conf = float(box.conf)
113
- if conf < conf_thresh:
114
- continue
115
-
116
- x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
117
- bw, bh = x2 - x1, y2 - y1
118
-
119
- if bw < min_size or bh < min_size:
120
- continue
121
-
122
- # Optional padding (mostly for crop saving)
123
- px1 = max(0, x1 - padding)
124
- py1 = max(0, y1 - padding)
125
- px2 = min(w, x2 + padding)
126
- py2 = min(h, y2 + padding)
127
-
128
- # Draw box
129
- draw.rectangle((x1, y1, x2, y2), outline="lime", width=3)
130
-
131
- if show_labels:
132
- label = f"conf {conf:.2f} {bw}Γ—{bh}"
133
- tw, th = draw.textbbox((0,0), label, font=font)[2:]
134
- draw.rectangle(
135
- (x1, y1 - th - 4, x1 + tw + 8, y1),
136
- fill=(0, 180, 0, 160)
137
- )
138
- draw.text((x1 + 4, y1 - th - 2), label, fill="white", font=font)
139
-
140
- kept += 1
141
-
142
- # Optional: save individual crops
143
- if save_debug_crops:
144
- os.makedirs("debug_regions", exist_ok=True)
145
- crop = img.crop((px1, py1, px2, py2))
146
- fname = f"debug_regions/r{kept:02d}_conf{conf:.2f}_{bw}x{bh}.png"
147
- crop.save(fname)
148
- logs.append(f"Saved crop β†’ {fname}")
149
-
150
- if kept == 0:
151
- msg = f"No regions kept after filters (conf β‰₯ {conf_thresh}, size β‰₯ {min_size}px)"
152
- logs.append(msg)
153
- else:
154
- logs.append(f"Visualized {kept} region(s)")
155
 
156
- logs.append("Finished.")
 
 
 
157
 
158
- return debug_img, "\n".join(logs)
159
 
160
- except Exception as e:
161
- logs.append(f"Error during inference: {str(e)}")
162
- logger.exception("Inference failed")
163
- return debug_img, "\n".join(logs)
 
 
 
 
164
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
 
166
 
167
- # ── Gradio Interface ────────────────────────────────────────────────────
168
  demo = gr.Interface(
169
- fn=visualize_regions,
170
  inputs=[
171
- gr.Image(type="pil", label="Upload image (handwritten document)"),
172
- gr.Slider(0.10, 0.60, step=0.02, value=0.25, label="Confidence threshold"),
173
- gr.Slider(30, 300, step=10, value=60, label="Minimum region width/height (px)"),
174
- gr.Slider(0, 40, step=4, value=0, label="Padding around box (for crops only)"),
175
- gr.Checkbox(label="Draw confidence + size labels on boxes", value=True),
176
- gr.Checkbox(label="Save individual region crops to debug_regions/", value=False),
177
- gr.Slider(640, 1280, step=64, value=1024, label="Inference image size (imgsz)"),
178
  ],
179
  outputs=[
180
- gr.Image(label="Detected text regions (green boxes)"),
181
- gr.Textbox(label="Log / debug info", lines=14),
 
182
  ],
183
- title="Region Detector Debug View",
184
  description=(
185
- "Only shows what the region YOLO model sees.\n\n"
186
- "β€’ Green boxes = detected text regions\n"
187
- "β€’ Tune confidence and min size until boxes look reasonable\n"
188
- "β€’ Use logs to see exact confidences and sizes\n"
189
- "β€’ Save crops if you want to manually check what is being detected"
190
  ),
191
- # theme=gr.themes.Soft(), # ← comment out or remove (moved to launch)
192
- # allow_flagging="never", # ← remove this line completely
193
  )
194
 
195
  if __name__ == "__main__":
196
- logger.info("Launching debug interface...")
197
  demo.launch()
198
 
199
 
200
 
201
 
202
 
203
-
204
-
205
-
206
-
207
-
208
-
209
-
210
-
211
-
212
-
 
1
+ # import gradio as gr
2
+ # from ultralytics import YOLO
3
+ # from PIL import Image, ImageDraw, ImageFont
4
+ # import torch
5
+ # import logging
6
+ # import os
7
+ # from datetime import datetime
8
+
9
+ # # # ── Quiet startup ───────────────────────────────────────────────────────
10
+ # # os.environ['HF_HUB_DISABLE_PROGRESS_BARS'] = '1'
11
+ # # logging.getLogger('ultralytics').setLevel(logging.WARNING)
12
+
13
+ # # logging.basicConfig(
14
+ # # level=logging.INFO,
15
+ # # format='%(asctime)s | %(level)-5s | %(message)s'
16
+ # # )
17
+ # # logger = logging.getLogger(__name__)
18
 
 
19
  # os.environ['HF_HUB_DISABLE_PROGRESS_BARS'] = '1'
20
  # logging.getLogger('ultralytics').setLevel(logging.WARNING)
21
 
22
+ # # FIXED logging format: use levelname, not level
23
  # logging.basicConfig(
24
  # level=logging.INFO,
25
+ # format='%(asctime)s | %(levelname)-5s | %(message)s', # ← changed level β†’ levelname
26
+ # datefmt='%Y-%m-%d %H:%M:%S'
27
  # )
28
  # logger = logging.getLogger(__name__)
29
 
30
+ # logger.info("Initializing region detector...")
31
+
32
+ # device = "cuda" if torch.cuda.is_available() else "cpu"
33
+ # logger.info(f"Device: {device}")
34
+
35
+ # # ── Load YOLO ───────────────────────────────────────────────────────────
36
+ # try:
37
+ # region_pt = 'regions.pt'
38
+ # if not os.path.exists(region_pt):
39
+ # for f in os.listdir('.'):
40
+ # name = f.lower()
41
+ # if name.endswith('.pt') and 'region' in name:
42
+ # region_pt = f
43
+ # break
44
+
45
+ # if not os.path.exists(region_pt):
46
+ # raise FileNotFoundError("No regions.pt (or similar *.pt) found in current directory")
47
+
48
+ # logger.info(f"Loading model: {region_pt}")
49
+ # model = YOLO(region_pt)
50
+ # logger.info("Region detector loaded")
51
+
52
+ # except Exception as e:
53
+ # logger.error(f"Model loading failed β†’ {e}", exc_info=True)
54
+ # raise
55
+
56
+
57
+ # def visualize_regions(
58
+ # image,
59
+ # conf_thresh: float = 0.25,
60
+ # min_size: int = 60,
61
+ # padding: int = 0,
62
+ # show_labels: bool = True,
63
+ # save_debug_crops: bool = False,
64
+ # imgsz: int = 1024,
65
+ # ):
66
+ # start = datetime.now().strftime("%H:%M:%S")
67
+ # logs = [f"[{start}] Processing started"]
68
+
69
+ # if image is None:
70
+ # logs.append("No image uploaded")
71
+ # return None, "\n".join(logs)
72
+
73
+ # # Load & convert
74
+ # if isinstance(image, str):
75
+ # img = Image.open(image).convert("RGB")
76
+ # else:
77
+ # img = image.convert("RGB")
78
+
79
+ # w, h = img.size
80
+ # logs.append(f"Image size: {w} Γ— {h}")
81
+
82
+ # debug_img = img.copy()
83
+ # draw = ImageDraw.Draw(debug_img)
84
+
85
+ # try:
86
+ # # Font for drawing labels (fallback to default)
87
+ # try:
88
+ # font = ImageFont.truetype("arial.ttf", 18)
89
+ # except:
90
+ # font = ImageFont.load_default()
91
+
92
+ # # ── Run detection ───────────────────────────────────────────────
93
+ # results = model(
94
+ # img,
95
+ # conf=conf_thresh,
96
+ # imgsz=imgsz,
97
+ # verbose=False
98
+ # )[0]
99
+
100
+ # boxes = results.boxes
101
+ # logs.append(f"Detected {len(boxes)} region candidate(s)")
102
+
103
+ # kept = 0
104
+
105
+ # # Sort top β†’ bottom
106
+ # if len(boxes) > 0:
107
+ # ys = boxes.xyxy[:, 1].cpu().numpy()
108
+ # order = ys.argsort()
109
+
110
+ # for idx in order:
111
+ # box = boxes[idx]
112
+ # conf = float(box.conf)
113
+ # if conf < conf_thresh:
114
+ # continue
115
+
116
+ # x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
117
+ # bw, bh = x2 - x1, y2 - y1
118
+
119
+ # if bw < min_size or bh < min_size:
120
+ # continue
121
+
122
+ # # Optional padding (mostly for crop saving)
123
+ # px1 = max(0, x1 - padding)
124
+ # py1 = max(0, y1 - padding)
125
+ # px2 = min(w, x2 + padding)
126
+ # py2 = min(h, y2 + padding)
127
+
128
+ # # Draw box
129
+ # draw.rectangle((x1, y1, x2, y2), outline="lime", width=3)
130
+
131
+ # if show_labels:
132
+ # label = f"conf {conf:.2f} {bw}Γ—{bh}"
133
+ # tw, th = draw.textbbox((0,0), label, font=font)[2:]
134
+ # draw.rectangle(
135
+ # (x1, y1 - th - 4, x1 + tw + 8, y1),
136
+ # fill=(0, 180, 0, 160)
137
+ # )
138
+ # draw.text((x1 + 4, y1 - th - 2), label, fill="white", font=font)
139
+
140
+ # kept += 1
141
+
142
+ # # Optional: save individual crops
143
+ # if save_debug_crops:
144
+ # os.makedirs("debug_regions", exist_ok=True)
145
+ # crop = img.crop((px1, py1, px2, py2))
146
+ # fname = f"debug_regions/r{kept:02d}_conf{conf:.2f}_{bw}x{bh}.png"
147
+ # crop.save(fname)
148
+ # logs.append(f"Saved crop β†’ {fname}")
149
+
150
+ # if kept == 0:
151
+ # msg = f"No regions kept after filters (conf β‰₯ {conf_thresh}, size β‰₯ {min_size}px)"
152
+ # logs.append(msg)
153
+ # else:
154
+ # logs.append(f"Visualized {kept} region(s)")
155
+
156
+ # logs.append("Finished.")
157
+
158
+ # return debug_img, "\n".join(logs)
159
+
160
+ # except Exception as e:
161
+ # logs.append(f"Error during inference: {str(e)}")
162
+ # logger.exception("Inference failed")
163
+ # return debug_img, "\n".join(logs)
164
+
165
+
166
+
167
+ # # ── Gradio Interface ────────────────────────────────────────────────────
168
+ # demo = gr.Interface(
169
+ # fn=visualize_regions,
170
+ # inputs=[
171
+ # gr.Image(type="pil", label="Upload image (handwritten document)"),
172
+ # gr.Slider(0.10, 0.60, step=0.02, value=0.25, label="Confidence threshold"),
173
+ # gr.Slider(30, 300, step=10, value=60, label="Minimum region width/height (px)"),
174
+ # gr.Slider(0, 40, step=4, value=0, label="Padding around box (for crops only)"),
175
+ # gr.Checkbox(label="Draw confidence + size labels on boxes", value=True),
176
+ # gr.Checkbox(label="Save individual region crops to debug_regions/", value=False),
177
+ # gr.Slider(640, 1280, step=64, value=1024, label="Inference image size (imgsz)"),
178
+ # ],
179
+ # outputs=[
180
+ # gr.Image(label="Detected text regions (green boxes)"),
181
+ # gr.Textbox(label="Log / debug info", lines=14),
182
+ # ],
183
+ # title="Region Detector Debug View",
184
+ # description=(
185
+ # "Only shows what the region YOLO model sees.\n\n"
186
+ # "β€’ Green boxes = detected text regions\n"
187
+ # "β€’ Tune confidence and min size until boxes look reasonable\n"
188
+ # "β€’ Use logs to see exact confidences and sizes\n"
189
+ # "β€’ Save crops if you want to manually check what is being detected"
190
+ # ),
191
+ # # theme=gr.themes.Soft(), # ← comment out or remove (moved to launch)
192
+ # # allow_flagging="never", # ← remove this line completely
193
+ # )
194
+
195
+ # if __name__ == "__main__":
196
+ # logger.info("Launching debug interface...")
197
+ # demo.launch()
198
+
199
+
200
+
201
+
202
+
203
+
204
+
205
+
206
+
207
+ import gradio as gr
208
+ from ultralytics import YOLO
209
+ from transformers import TrOCRProcessor, VisionEncoderDecoderModel
210
+ from PIL import Image, ImageDraw
211
+ import torch
212
+ import logging
213
+ import os
214
+ import warnings
215
+ import time
216
+ from datetime import datetime
217
+
218
+ # ── Suppress noisy logs ──────────────────────────────────────────────────────
219
  os.environ['HF_HUB_DISABLE_PROGRESS_BARS'] = '1'
220
+ warnings.filterwarnings('ignore')
221
+ logging.getLogger('transformers').setLevel(logging.ERROR)
222
  logging.getLogger('ultralytics').setLevel(logging.WARNING)
223
 
224
+ # Clean logging
225
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s | %(levelname)-5s | %(message)s')
 
 
 
 
226
  logger = logging.getLogger(__name__)
227
 
228
+ logger.info("Initializing models...")
 
229
  device = "cuda" if torch.cuda.is_available() else "cpu"
230
  logger.info(f"Device: {device}")
231
 
232
+ def load_with_retry(cls, name, token=None, retries=4, delay=6):
233
+ for attempt in range(1, retries + 1):
234
+ try:
235
+ logger.info(f"Loading {name} (attempt {attempt}/{retries})")
236
+ if "Processor" in str(cls):
237
+ return cls.from_pretrained(name, token=token)
238
+ return cls.from_pretrained(name, token=token).to(device)
239
+ except Exception as e:
240
+ logger.warning(f"Load failed: {e}")
241
+ if attempt < retries:
242
+ time.sleep(delay)
243
+ raise RuntimeError(f"Failed to load {name} after {retries} attempts")
244
+
245
+
246
  try:
247
+ # Locate local YOLO line detection weights
248
+ line_pt = 'lines.pt'
249
+
250
+ if not os.path.exists(line_pt):
251
  for f in os.listdir('.'):
252
  name = f.lower()
253
+ if 'line' in name and name.endswith('.pt'):
254
+ line_pt = f
255
  break
256
 
257
+ if not os.path.exists(line_pt):
258
+ raise FileNotFoundError("Could not find lines.pt (or similar *.pt file containing 'line' in name)")
259
+
260
+ logger.info("Loading YOLO line model...")
261
+ line_model = YOLO(line_pt)
262
+ logger.info("YOLO line model loaded")
263
 
264
+ hf_token = os.getenv("HF_TOKEN")
265
+ processor = load_with_retry(TrOCRProcessor, "microsoft/trocr-base-handwritten", hf_token)
266
+ trocr = load_with_retry(VisionEncoderDecoderModel, "microsoft/trocr-base-handwritten", hf_token)
267
+ logger.info("TrOCR loaded β†’ ready")
268
 
269
  except Exception as e:
270
+ logger.error(f"Model loading failed: {e}", exc_info=True)
271
  raise
272
 
273
 
274
+ def run_ocr(crop: Image.Image) -> str:
275
+ if crop.width < 20 or crop.height < 12:
276
+ return ""
277
+ pixels = processor(images=crop, return_tensors="pt").pixel_values.to(device)
278
+ ids = trocr.generate(pixels, max_new_tokens=128)
279
+ return processor.batch_decode(ids, skip_special_tokens=True)[0].strip()
280
+
281
+
282
+ def process_document(
283
  image,
284
+ enable_debug_crops: bool = False,
285
+ line_imgsz: int = 768,
286
  conf_thresh: float = 0.25,
 
 
 
 
 
287
  ):
288
+ start_ts = datetime.now().strftime("%H:%M:%S")
289
+ logs = []
290
+
291
+ def log(msg: str, level: str = "INFO"):
292
+ line = f"[{start_ts}] {level:5} {msg}"
293
+ logs.append(line)
294
+ if level == "ERROR":
295
+ logger.error(msg)
296
+ else:
297
+ logger.info(msg)
298
+
299
+ log("Start processing")
300
 
301
  if image is None:
302
+ log("No image uploaded", "ERROR")
303
+ return None, "Upload an image", "\n".join(logs)
304
 
305
+ try:
306
+ # ── Prepare ─────────────────────────────────────────────────────────────
307
+ if not isinstance(image, Image.Image):
308
+ img = Image.open(image).convert("RGB")
309
+ else:
310
+ img = image.convert("RGB")
311
+
312
+ debug_img = img.copy()
313
+ draw = ImageDraw.Draw(debug_img)
314
+ w, h = img.size
315
+ log(f"Input image: {w} Γ— {h} px")
316
+
317
+ debug_dir = "debug_crops"
318
+ if enable_debug_crops:
319
+ os.makedirs(debug_dir, exist_ok=True)
320
+ log(f"Debug crops will be saved to {debug_dir}/")
321
+
322
+ extracted = []
323
+
324
+ # ── Line detection on full image ────────────────────────────────────────
325
+ # Adaptive size based on image dimensions
326
+ max_dim = max(w, h)
327
+ if max_dim > 2200:
328
+ used_sz = 1280
329
+ elif max_dim > 1400:
330
+ used_sz = 1024
331
+ elif max_dim < 600:
332
+ used_sz = 640
333
+ else:
334
+ used_sz = line_imgsz
335
 
336
+ log(f"Running line detection (imgsz={used_sz}, confβ‰₯{conf_thresh}) …")
 
337
 
338
+ res = line_model(img, conf=conf_thresh, imgsz=used_sz, verbose=False)[0]
339
+ boxes = res.boxes
340
 
341
+ log(f"β†’ Detected {len(boxes)} line candidate(s)")
 
 
 
 
 
342
 
343
+ if len(boxes) == 0:
344
+ msg = "No text lines detected"
345
+ log(msg, "WARNING")
346
+ return debug_img, msg, "\n".join(logs)
 
 
 
347
 
348
+ # Sort top β†’ bottom
349
+ ys = boxes.xyxy[:, 1].cpu().numpy() # y_min
350
+ order = ys.argsort()
351
 
352
+ for j, idx in enumerate(order, 1):
353
+ conf = float(boxes.conf[idx])
354
+ x1, y1, x2, y2 = map(round, boxes.xyxy[idx].cpu().tolist())
355
 
356
+ lw, lh = x2 - x1, y2 - y1
357
+ log(f" Line {j}/{len(boxes)} conf={conf:.3f} {x1},{y1} β†’ {x2},{y2} ({lw}Γ—{lh})")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358
 
359
+ # Skip very small detections
360
+ if lw < 60 or lh < 20:
361
+ log(f" β†’ skipped (too small)")
362
+ continue
363
 
364
+ draw.rectangle((x1, y1, x2, y2), outline="red", width=3)
365
 
366
+ line_crop = img.crop((x1, y1, x2, y2))
367
+
368
+ if enable_debug_crops:
369
+ fname = f"{debug_dir}/line_{j:02d}_conf{conf:.2f}.png"
370
+ line_crop.save(fname)
371
+
372
+ text = run_ocr(line_crop)
373
+ log(f" OCR β†’ '{text}'")
374
 
375
+ if text.strip():
376
+ extracted.append(text)
377
+
378
+ # ── Finalize ────────────────────────────────────────────────────────────
379
+ if not extracted:
380
+ msg = "No readable text found after OCR"
381
+ log(msg, "WARNING")
382
+ return debug_img, msg, "\n".join(logs)
383
+
384
+ log(f"Success β€” extracted {len(extracted)} line(s)")
385
+ if enable_debug_crops:
386
+ log(f"Debug crops saved to {debug_dir}/")
387
+
388
+ return debug_img, "\n".join(extracted), "\n".join(logs)
389
+
390
+ except Exception as e:
391
+ log(f"Processing failed: {e}", "ERROR")
392
+ logger.exception("Traceback:")
393
+ return debug_img, f"Error: {str(e)}", "\n".join(logs)
394
 
395
 
 
396
  demo = gr.Interface(
397
+ fn=process_document,
398
  inputs=[
399
+ gr.Image(type="pil", label="Handwritten document"),
400
+ gr.Checkbox(label="Save debug crops", value=False),
401
+ gr.Slider(512, 1280, step=64, value=768, label="Line detection size (imgsz)"),
402
+ gr.Slider(0.15, 0.5, step=0.05, value=0.25, label="Confidence threshold"),
 
 
 
403
  ],
404
  outputs=[
405
+ gr.Image(label="Debug (red = detected text lines)"),
406
+ gr.Textbox(label="Extracted Text", lines=10),
407
+ gr.Textbox(label="Detailed Logs (copy if alignment is wrong)", lines=16),
408
  ],
409
+ title="Handwritten Line Detection + TrOCR",
410
  description=(
411
+ "Red boxes = text lines detected by YOLO β†’ sent to TrOCR for recognition\n\n"
412
+ "Use **Detailed Logs** to check coordinates, sizes & confidence values if results look off."
 
 
 
413
  ),
414
+ theme=gr.themes.Soft(),
415
+ flagging_mode="never",
416
  )
417
 
418
  if __name__ == "__main__":
419
+ logger.info("Launching interface…")
420
  demo.launch()
421
 
422
 
423
 
424
 
425