Ankushbl6 commited on
Commit
e082ab0
·
verified ·
1 Parent(s): 73882ca

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +155 -181
src/streamlit_app.py CHANGED
@@ -211,11 +211,12 @@ def has_any_label(fname: str) -> bool:
211
  vals = st.session_state.field_values.get(fname, {})
212
  return any(str(v).strip() for v in vals.values())
213
 
214
- # --- Upload ---
215
  uploaded_files = st.file_uploader(
216
  "Upload remittance images",
217
  type=["png", "jpg", "jpeg"],
218
  accept_multiple_files=True,
 
219
  )
220
 
221
  if not uploaded_files:
@@ -233,7 +234,9 @@ for f in uploaded_files:
233
  images.append({"name": f.name, "image": img, "bytes": st.session_state.image_data[f.name]})
234
 
235
  file_names = [img["name"] for img in images]
236
- selected_name = st.selectbox("Select image", file_names)
 
 
237
  st.session_state.selected_image = selected_name
238
 
239
  selected_img_data = next(img for img in images if img["name"] == selected_name)
@@ -252,36 +255,36 @@ if selected_name not in st.session_state.rect_version:
252
  if selected_name not in st.session_state.zoom_values:
253
  st.session_state.zoom_values[selected_name] = get_default_zoom(pil_image)
254
 
255
- # ========== FIELD SELECTION ==========
256
- st.markdown("---")
257
-
258
- def add_line_item():
259
- img = st.session_state.selected_image
260
- if img:
261
- st.session_state.num_line_items[img] += 1
262
-
263
- def remove_line_item():
264
- img = st.session_state.selected_image
265
- if img and st.session_state.num_line_items[img] > 1:
266
- last_num = st.session_state.num_line_items[img]
267
- for lif in LINE_ITEM_FIELDS:
268
- key = f"Line {last_num}: {lif}"
269
- st.session_state.field_values[img].pop(key, None)
270
- st.session_state.field_rects_orig[img].pop(key, None)
271
- st.session_state.num_line_items[img] -= 1
272
- st.session_state.rect_version[img] += 1
273
 
274
  # Initialize field variables with defaults
275
  display_field_name = SINGLE_FIELDS[0]
276
  storage_field_name = SINGLE_FIELDS[0]
277
  base_field_for_color = SINGLE_FIELDS[0]
278
 
279
- sel_col1, sel_col2, sel_col3, sel_col4 = st.columns([1.5, 1.5, 2, 2])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
 
281
- with sel_col1:
282
  field_type = st.radio("Type", ["Single", "Line Item"], horizontal=True, label_visibility="collapsed")
283
 
284
- with sel_col2:
285
  if field_type == "Single":
286
  field_name = st.selectbox("Field", SINGLE_FIELDS, label_visibility="collapsed")
287
  display_field_name = field_name
@@ -289,46 +292,40 @@ with sel_col2:
289
  base_field_for_color = field_name
290
  else:
291
  num_items = st.session_state.num_line_items[selected_name]
292
- line_item_options = [f"Line {i+1}" for i in range(num_items)]
293
- selected_line_item = st.selectbox("Line", line_item_options, label_visibility="collapsed")
294
- line_item_num = int(selected_line_item.split()[1])
295
-
296
- with sel_col3:
297
- if field_type == "Line Item":
 
 
 
 
 
 
 
 
 
298
  base_field = st.selectbox("Field", LINE_ITEM_FIELDS, label_visibility="collapsed")
299
  display_field_name = f"{selected_line_item}: {base_field}"
300
  storage_field_name = f"Line {line_item_num}: {base_field}"
301
  base_field_for_color = base_field
302
 
303
- with sel_col4:
304
- if field_type == "Line Item":
305
- # Line items +/- buttons next to line item dropdown
306
- add_col, rem_col, info_col = st.columns([1, 1, 2])
307
- with add_col:
308
- st.button("➕", key=f"addli_{selected_name}", on_click=add_line_item, help="Add line item")
309
- with rem_col:
310
- if st.session_state.num_line_items[selected_name] > 1:
311
- st.button("➖", key=f"remli_{selected_name}", on_click=remove_line_item, help="Remove line item")
312
- with info_col:
313
- st.write(f"Lines: **{st.session_state.num_line_items[selected_name]}**")
314
-
315
- # Guard in case something weird happens
316
- if not storage_field_name:
317
- storage_field_name = display_field_name
318
 
319
- field_color = FIELD_COLORS.get(base_field_for_color or display_field_name, "#FF0000")
 
320
 
321
- st.markdown(f"**Current:** <span style='color:{field_color}'>●</span> {display_field_name}", unsafe_allow_html=True)
322
-
323
- # ========== MAIN COLUMNS ==========
324
- col1, col2 = st.columns([3, 2])
325
-
326
- with col1:
327
- # Zoom controls - selectbox + buttons
328
  current_zoom = st.session_state.zoom_values[selected_name]
329
  zoom_index = ZOOM_OPTIONS.index(current_zoom) if current_zoom in ZOOM_OPTIONS else 0
330
 
331
- # Zoom callbacks
332
  def do_zoom_out():
333
  img = st.session_state.selected_image
334
  curr = st.session_state.zoom_values[img]
@@ -350,11 +347,11 @@ with col1:
350
  pil_img = load_image(img_bytes)
351
  st.session_state.zoom_values[img] = get_default_zoom(pil_img)
352
 
353
- zoom_row1, zoom_row2, zoom_row3, zoom_row4 = st.columns([2, 1, 1, 1])
354
 
355
- with zoom_row1:
356
  zoom = st.selectbox(
357
- "🔍 Zoom",
358
  options=ZOOM_OPTIONS,
359
  index=zoom_index,
360
  format_func=lambda x: f"{x}%",
@@ -363,16 +360,113 @@ with col1:
363
  )
364
  st.session_state.zoom_values[selected_name] = zoom
365
 
366
- with zoom_row2:
367
  st.button("➖", key="zoom_out", help="Zoom out", on_click=do_zoom_out)
368
 
369
- with zoom_row3:
370
  st.button("➕", key="zoom_in", help="Zoom in", on_click=do_zoom_in)
371
 
372
- with zoom_row4:
373
  st.button("Fit", key="zoom_fit", help="Fit to screen", on_click=do_zoom_fit)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
374
 
375
- # Get current zoom value
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
376
  zoom = st.session_state.zoom_values[selected_name]
377
 
378
  scale = zoom / 100.0
@@ -381,14 +475,6 @@ with col1:
381
 
382
  # Get display image - fresh PIL object each time from stable bytes
383
  display_image = get_display_image_from_bytes(image_bytes, disp_w, disp_h)
384
-
385
- st.caption(f"Original: {pil_image.width}×{pil_image.height} | Display: {disp_w}×{disp_h}")
386
-
387
- has_rect = storage_field_name in st.session_state.field_rects_orig[selected_name]
388
- if has_rect:
389
- st.success(f"✅ Has rectangle. Draw again to replace.")
390
- else:
391
- st.warning(f"⬜ Draw rectangle for this field")
392
 
393
  def orig_to_display(rect_orig, s):
394
  return {
@@ -472,116 +558,4 @@ with col1:
472
  else:
473
  st.toast(f"✅ Rectangle saved (no text detected)")
474
  except Exception:
475
- st.toast(f"✅ Rectangle saved")
476
-
477
- with col2:
478
- # ========== ALL VALUES SECTION (MOVED UP) ==========
479
- st.markdown("---")
480
- single_rects = sum(1 for f in st.session_state.field_rects_orig[selected_name] if not f.startswith("Line "))
481
- num_items = st.session_state.num_line_items[selected_name]
482
- line_rects = sum(1 for f in st.session_state.field_rects_orig[selected_name] if f.startswith("Line "))
483
-
484
- st.write(f"**Single:** {single_rects}/{len(SINGLE_FIELDS)} | **Lines ({num_items}):** {line_rects}/{num_items * len(LINE_ITEM_FIELDS)}")
485
-
486
- with st.expander("📋 All Values"):
487
- for f in SINGLE_FIELDS:
488
- v = st.session_state.field_values[selected_name].get(f, "")
489
- if v.strip():
490
- st.write(f"**{f}:** {v}")
491
- for i in range(1, num_items + 1):
492
- vals = [(lif, st.session_state.field_values[selected_name].get(f"Line {i}: {lif}", ""))
493
- for lif in LINE_ITEM_FIELDS]
494
- vals = [(lif, v) for lif, v in vals if v.strip()]
495
- if vals:
496
- st.write(f"**Line {i}:**")
497
- for lif, v in vals:
498
- st.write(f" {lif}: {v}")
499
-
500
- # ========== OCR & VALUE SECTION (MOVED DOWN) ==========
501
- st.markdown("---")
502
- st.subheader("OCR & Value")
503
-
504
- current_rect_orig = st.session_state.field_rects_orig[selected_name].get(storage_field_name)
505
- current_val = st.session_state.field_values[selected_name].get(storage_field_name, "")
506
-
507
- if current_rect_orig:
508
- st.caption(f"📐 ({current_rect_orig['left']:.0f}, {current_rect_orig['top']:.0f}) - {current_rect_orig['width']:.0f}×{current_rect_orig['height']:.0f}")
509
-
510
- x1 = max(0, int(current_rect_orig["left"]))
511
- y1 = max(0, int(current_rect_orig["top"]))
512
- x2 = min(pil_image.width, int(current_rect_orig["left"] + current_rect_orig["width"]))
513
- y2 = min(pil_image.height, int(current_rect_orig["top"] + current_rect_orig["height"]))
514
- if x2 > x1 and y2 > y1:
515
- crop = pil_image.crop((x1, y1, x2, y2))
516
- st.image(crop, caption="Selected Region", width=200)
517
-
518
- new_val = st.text_area("Value (auto-filled by OCR)", value=current_val, height=80)
519
-
520
- col_btn1, col_btn2, col_btn3 = st.columns(3)
521
-
522
- with col_btn1:
523
- if st.button("💾 Save"):
524
- st.session_state.field_values[selected_name][storage_field_name] = new_val
525
- st.success("Saved!")
526
-
527
- with col_btn2:
528
- if current_rect_orig and st.button("🔄 Re-OCR"):
529
- x1 = max(0, int(current_rect_orig["left"]))
530
- y1 = max(0, int(current_rect_orig["top"]))
531
- x2 = min(pil_image.width, int(current_rect_orig["left"] + current_rect_orig["width"]))
532
- y2 = min(pil_image.height, int(current_rect_orig["top"] + current_rect_orig["height"]))
533
- if x2 > x1 and y2 > y1:
534
- crop = pil_image.crop((x1, y1, x2, y2))
535
- try:
536
- text = pytesseract.image_to_string(crop, config="--psm 6").strip()
537
- if text:
538
- st.session_state.field_values[selected_name][storage_field_name] = text
539
- st.success(f"OCR: {text}")
540
- else:
541
- st.warning("Empty result")
542
- except Exception as e:
543
- st.error(f"OCR failed: {e}")
544
-
545
- with col_btn3:
546
- def delete_rect():
547
- st.session_state.pending_delete = (selected_name, storage_field_name)
548
-
549
- if current_rect_orig:
550
- st.button("🗑️ Delete", on_click=delete_rect)
551
-
552
- # ========== EXPORT SECTION ==========
553
- st.markdown("---")
554
- st.subheader("📤 JSONL Export")
555
-
556
- # Export ALL labeled remittances
557
- records_all = [
558
- build_gt_record_for_file(img["name"])
559
- for img in images
560
- if has_any_label(img["name"])
561
- ]
562
-
563
- if records_all:
564
- all_jsonl_str = "\n".join(
565
- json.dumps(rec, ensure_ascii=False) for rec in records_all
566
- )
567
- st.download_button(
568
- "⬇️ Export ALL labeled remittances (JSONL)",
569
- data=all_jsonl_str.encode("utf-8"),
570
- file_name="remittances_ground_truth.jsonl",
571
- mime="application/json",
572
- )
573
- else:
574
- st.caption("No labeled remittances yet to export in bulk.")
575
-
576
- # Export CURRENT remittance
577
- current_record = build_gt_record_for_file(selected_name)
578
- with st.expander("Preview CURRENT remittance JSON"):
579
- st.json(current_record)
580
-
581
- current_jsonl_str = json.dumps(current_record, ensure_ascii=False) + "\n"
582
- st.download_button(
583
- "⬇️ Export CURRENT remittance (JSONL)",
584
- data=current_jsonl_str.encode("utf-8"),
585
- file_name=f"{os.path.splitext(selected_name)[0]}_remittance.jsonl",
586
- mime="application/json",
587
- )
 
211
  vals = st.session_state.field_values.get(fname, {})
212
  return any(str(v).strip() for v in vals.values())
213
 
214
+ # --- Upload (compact) ---
215
  uploaded_files = st.file_uploader(
216
  "Upload remittance images",
217
  type=["png", "jpg", "jpeg"],
218
  accept_multiple_files=True,
219
+ label_visibility="collapsed"
220
  )
221
 
222
  if not uploaded_files:
 
234
  images.append({"name": f.name, "image": img, "bytes": st.session_state.image_data[f.name]})
235
 
236
  file_names = [img["name"] for img in images]
237
+
238
+ # Image selector dropdown only (no duplicate list above)
239
+ selected_name = st.selectbox("Select image", file_names, label_visibility="collapsed")
240
  st.session_state.selected_image = selected_name
241
 
242
  selected_img_data = next(img for img in images if img["name"] == selected_name)
 
255
  if selected_name not in st.session_state.zoom_values:
256
  st.session_state.zoom_values[selected_name] = get_default_zoom(pil_image)
257
 
258
+ # ========== MAIN COLUMNS ==========
259
+ col1, col2 = st.columns([3, 2])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
 
261
  # Initialize field variables with defaults
262
  display_field_name = SINGLE_FIELDS[0]
263
  storage_field_name = SINGLE_FIELDS[0]
264
  base_field_for_color = SINGLE_FIELDS[0]
265
 
266
+ with col2:
267
+ # ========== FIELD SELECTION (NOW ON RHS) ==========
268
+ st.markdown("#### 🎯 Field Selection")
269
+
270
+ def add_line_item():
271
+ img = st.session_state.selected_image
272
+ if img:
273
+ st.session_state.num_line_items[img] += 1
274
+
275
+ def remove_line_item():
276
+ img = st.session_state.selected_image
277
+ if img and st.session_state.num_line_items[img] > 1:
278
+ last_num = st.session_state.num_line_items[img]
279
+ for lif in LINE_ITEM_FIELDS:
280
+ key = f"Line {last_num}: {lif}"
281
+ st.session_state.field_values[img].pop(key, None)
282
+ st.session_state.field_rects_orig[img].pop(key, None)
283
+ st.session_state.num_line_items[img] -= 1
284
+ st.session_state.rect_version[img] += 1
285
 
 
286
  field_type = st.radio("Type", ["Single", "Line Item"], horizontal=True, label_visibility="collapsed")
287
 
 
288
  if field_type == "Single":
289
  field_name = st.selectbox("Field", SINGLE_FIELDS, label_visibility="collapsed")
290
  display_field_name = field_name
 
292
  base_field_for_color = field_name
293
  else:
294
  num_items = st.session_state.num_line_items[selected_name]
295
+
296
+ line_col1, line_col2 = st.columns([2, 1])
297
+ with line_col1:
298
+ line_item_options = [f"Line {i+1}" for i in range(num_items)]
299
+ selected_line_item = st.selectbox("Line", line_item_options, label_visibility="collapsed")
300
+ line_item_num = int(selected_line_item.split()[1])
301
+
302
+ with line_col2:
303
+ add_col, rem_col = st.columns(2)
304
+ with add_col:
305
+ st.button("➕", key=f"addli_{selected_name}", on_click=add_line_item, help="Add line item")
306
+ with rem_col:
307
+ if st.session_state.num_line_items[selected_name] > 1:
308
+ st.button("➖", key=f"remli_{selected_name}", on_click=remove_line_item, help="Remove line item")
309
+
310
  base_field = st.selectbox("Field", LINE_ITEM_FIELDS, label_visibility="collapsed")
311
  display_field_name = f"{selected_line_item}: {base_field}"
312
  storage_field_name = f"Line {line_item_num}: {base_field}"
313
  base_field_for_color = base_field
314
 
315
+ # Guard in case something weird happens
316
+ if not storage_field_name:
317
+ storage_field_name = display_field_name
 
 
 
 
 
 
 
 
 
 
 
 
318
 
319
+ field_color = FIELD_COLORS.get(base_field_for_color or display_field_name, "#FF0000")
320
+ st.markdown(f"**Current:** <span style='color:{field_color}'>●</span> {display_field_name}", unsafe_allow_html=True)
321
 
322
+ # ========== ZOOM CONTROLS (NOW ON RHS) ==========
323
+ st.markdown("---")
324
+ st.markdown("#### 🔍 Zoom")
325
+
 
 
 
326
  current_zoom = st.session_state.zoom_values[selected_name]
327
  zoom_index = ZOOM_OPTIONS.index(current_zoom) if current_zoom in ZOOM_OPTIONS else 0
328
 
 
329
  def do_zoom_out():
330
  img = st.session_state.selected_image
331
  curr = st.session_state.zoom_values[img]
 
347
  pil_img = load_image(img_bytes)
348
  st.session_state.zoom_values[img] = get_default_zoom(pil_img)
349
 
350
+ zoom_col1, zoom_col2, zoom_col3, zoom_col4 = st.columns([2, 1, 1, 1])
351
 
352
+ with zoom_col1:
353
  zoom = st.selectbox(
354
+ "Zoom",
355
  options=ZOOM_OPTIONS,
356
  index=zoom_index,
357
  format_func=lambda x: f"{x}%",
 
360
  )
361
  st.session_state.zoom_values[selected_name] = zoom
362
 
363
+ with zoom_col2:
364
  st.button("➖", key="zoom_out", help="Zoom out", on_click=do_zoom_out)
365
 
366
+ with zoom_col3:
367
  st.button("➕", key="zoom_in", help="Zoom in", on_click=do_zoom_in)
368
 
369
+ with zoom_col4:
370
  st.button("Fit", key="zoom_fit", help="Fit to screen", on_click=do_zoom_fit)
371
+
372
+ st.caption(f"Original: {pil_image.width}×{pil_image.height}")
373
+
374
+ # ========== OCR & VALUE SECTION ==========
375
+ st.markdown("---")
376
+ st.markdown("#### ✏️ OCR & Value")
377
+
378
+ current_rect_orig = st.session_state.field_rects_orig[selected_name].get(storage_field_name)
379
+ current_val = st.session_state.field_values[selected_name].get(storage_field_name, "")
380
+
381
+ new_val = st.text_area("Value (auto-filled by OCR)", value=current_val, height=80, label_visibility="collapsed", placeholder="Value (auto-filled by OCR)")
382
+
383
+ col_btn1, col_btn2, col_btn3 = st.columns(3)
384
+
385
+ with col_btn1:
386
+ if st.button("💾 Save"):
387
+ st.session_state.field_values[selected_name][storage_field_name] = new_val
388
+ st.success("Saved!")
389
+
390
+ with col_btn2:
391
+ if current_rect_orig and st.button("🔄 Re-OCR"):
392
+ x1 = max(0, int(current_rect_orig["left"]))
393
+ y1 = max(0, int(current_rect_orig["top"]))
394
+ x2 = min(pil_image.width, int(current_rect_orig["left"] + current_rect_orig["width"]))
395
+ y2 = min(pil_image.height, int(current_rect_orig["top"] + current_rect_orig["height"]))
396
+ if x2 > x1 and y2 > y1:
397
+ crop = pil_image.crop((x1, y1, x2, y2))
398
+ try:
399
+ text = pytesseract.image_to_string(crop, config="--psm 6").strip()
400
+ if text:
401
+ st.session_state.field_values[selected_name][storage_field_name] = text
402
+ st.success(f"OCR: {text}")
403
+ else:
404
+ st.warning("Empty result")
405
+ except Exception as e:
406
+ st.error(f"OCR failed: {e}")
407
 
408
+ with col_btn3:
409
+ def delete_rect():
410
+ st.session_state.pending_delete = (selected_name, storage_field_name)
411
+
412
+ if current_rect_orig:
413
+ st.button("🗑️ Delete", on_click=delete_rect)
414
+
415
+ # ========== ALL VALUES SECTION ==========
416
+ with st.expander("📋 All Values"):
417
+ for f in SINGLE_FIELDS:
418
+ v = st.session_state.field_values[selected_name].get(f, "")
419
+ if v.strip():
420
+ st.write(f"**{f}:** {v}")
421
+ num_items = st.session_state.num_line_items[selected_name]
422
+ for i in range(1, num_items + 1):
423
+ vals = [(lif, st.session_state.field_values[selected_name].get(f"Line {i}: {lif}", ""))
424
+ for lif in LINE_ITEM_FIELDS]
425
+ vals = [(lif, v) for lif, v in vals if v.strip()]
426
+ if vals:
427
+ st.write(f"**Line {i}:**")
428
+ for lif, v in vals:
429
+ st.write(f" {lif}: {v}")
430
+
431
+ # ========== EXPORT SECTION ==========
432
+ st.markdown("---")
433
+ st.markdown("#### 📤 JSONL Export")
434
+
435
+ # Export ALL labeled remittances
436
+ records_all = [
437
+ build_gt_record_for_file(img["name"])
438
+ for img in images
439
+ if has_any_label(img["name"])
440
+ ]
441
+
442
+ if records_all:
443
+ all_jsonl_str = "\n".join(
444
+ json.dumps(rec, ensure_ascii=False) for rec in records_all
445
+ )
446
+ st.download_button(
447
+ "⬇️ Export ALL labeled (JSONL)",
448
+ data=all_jsonl_str.encode("utf-8"),
449
+ file_name="remittances_ground_truth.jsonl",
450
+ mime="application/json",
451
+ )
452
+ else:
453
+ st.caption("No labeled remittances yet.")
454
+
455
+ # Export CURRENT remittance
456
+ current_record = build_gt_record_for_file(selected_name)
457
+ with st.expander("Preview CURRENT JSON"):
458
+ st.json(current_record)
459
+
460
+ current_jsonl_str = json.dumps(current_record, ensure_ascii=False) + "\n"
461
+ st.download_button(
462
+ "⬇️ Export CURRENT (JSONL)",
463
+ data=current_jsonl_str.encode("utf-8"),
464
+ file_name=f"{os.path.splitext(selected_name)[0]}_remittance.jsonl",
465
+ mime="application/json",
466
+ )
467
+
468
+ with col1:
469
+ # ========== CANVAS / IMAGE (LEFT SIDE) ==========
470
  zoom = st.session_state.zoom_values[selected_name]
471
 
472
  scale = zoom / 100.0
 
475
 
476
  # Get display image - fresh PIL object each time from stable bytes
477
  display_image = get_display_image_from_bytes(image_bytes, disp_w, disp_h)
 
 
 
 
 
 
 
 
478
 
479
  def orig_to_display(rect_orig, s):
480
  return {
 
558
  else:
559
  st.toast(f"✅ Rectangle saved (no text detected)")
560
  except Exception:
561
+ st.toast(f"✅ Rectangle saved")