Ankushbl6 commited on
Commit
a3bb747
·
verified ·
1 Parent(s): 71cd379

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +585 -38
src/streamlit_app.py CHANGED
@@ -1,40 +1,587 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
 
4
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- """
7
- # Welcome to Streamlit!
8
-
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
-
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
-
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
1
+ import os
2
+ from io import BytesIO
3
+ import json
4
+
5
  import streamlit as st
6
+ from PIL import Image, ImageEnhance
7
+ from streamlit_drawable_canvas import st_canvas
8
+ import pytesseract
9
+
10
+ # Tesseract is installed via packages.txt on HuggingFace Spaces (Linux)
11
+ # No need to set path - it's in system PATH
12
+
13
+ st.set_page_config(
14
+ page_title="Remittance GT Annotator - Interactive OCR",
15
+ layout="wide"
16
+ )
17
+
18
+ st.title("Remittance GT Annotator - Interactive OCR")
19
+
20
+ # ---- Define fields ----
21
+ SINGLE_FIELDS = [
22
+ "Remittance Advice Number",
23
+ "Remittance Advice Date",
24
+ "Payment Method",
25
+ "FCY",
26
+ "Total Payment Amount in FCY",
27
+ "Payment Date",
28
+ "Payment Reference Number/Check Number",
29
+ "Customer Name",
30
+ "Customer Address",
31
+ "Customer Contact Information",
32
+ "Supplier Name",
33
+ "Supplier Address",
34
+ "Supplier Contact Information",
35
+ "Bank Name",
36
+ "Bank Account Number",
37
+ "Bank Routing Number",
38
+ "SWIFT/BIC Code",
39
+ ]
40
+
41
+ LINE_ITEM_FIELDS = [
42
+ "PO number",
43
+ "Invoice number",
44
+ "Other document reference number",
45
+ "Invoice Date",
46
+ "Invoice Amount in FCY",
47
+ "Amount Paid for Each Invoice in FCY",
48
+ "Outstanding Balance in FCY",
49
+ "Discounts Taken in FCY",
50
+ "Adjustments(Withholding Tax) in FCY",
51
+ "Description",
52
+ ]
53
+
54
+ COLOR_PALETTE = [
55
+ "#e6194b", "#3cb44b", "#ffe119", "#4363d8", "#f58231",
56
+ "#911eb4", "#46f0f0", "#f032e6", "#bcf60c", "#fabebe",
57
+ "#008080", "#e6beff", "#9a6324", "#fffac8", "#800000",
58
+ "#aaffc3", "#808000", "#ffd8b1", "#000075", "#808080",
59
+ "#ffe4e1", "#40e0d0", "#ff1493", "#7fffd4", "#b0e0e6",
60
+ "#ffb6c1", "#add8e6",
61
+ ]
62
+
63
+ ALL_BASE_FIELDS = SINGLE_FIELDS + LINE_ITEM_FIELDS
64
+ FIELD_COLORS = {field: COLOR_PALETTE[i % len(COLOR_PALETTE)] for i, field in enumerate(ALL_BASE_FIELDS)}
65
+
66
+ # ----- JSONL schema helper mappings -----
67
+ HEADER_GROUPS = {
68
+ "remittance_advice_details": {
69
+ "Remittance Advice Number": "remittance_advice_number",
70
+ "Remittance Advice Date": "remittance_advice_date",
71
+ "Payment Method": "payment_method",
72
+ "FCY": "fcy",
73
+ "Total Payment Amount in FCY": "total_payment_amount_in_fcy",
74
+ "Payment Date": "payment_date",
75
+ "Payment Reference Number/Check Number": "payment_reference_number_check_number",
76
+ },
77
+ "customer_supplier_details": {
78
+ "Customer Name": "customer_name",
79
+ "Customer Address": "customer_address",
80
+ "Customer Contact Information": "customer_contact_information",
81
+ "Supplier Name": "supplier_name",
82
+ "Supplier Address": "supplier_address",
83
+ "Supplier Contact Information": "supplier_contact_information",
84
+ },
85
+ "bank_details": {
86
+ "Bank Name": "bank_name",
87
+ "Bank Account Number": "bank_account_number",
88
+ "Bank Routing Number": "bank_routing_number",
89
+ "SWIFT/BIC Code": "swift_bic_code",
90
+ },
91
+ }
92
+
93
+ LINE_ITEM_FIELD_KEY_MAP = {
94
+ "PO number": "po_number",
95
+ "Invoice number": "invoice_number",
96
+ "Other document reference number": "other_document_reference_number",
97
+ "Invoice Date": "invoice_date",
98
+ "Invoice Amount in FCY": "invoice_amount_in_fcy",
99
+ "Amount Paid for Each Invoice in FCY": "amount_paid_for_each_invoice_in_fcy",
100
+ "Outstanding Balance in FCY": "outstanding_balance_in_fcy",
101
+ "Discounts Taken in FCY": "discounts_taken_in_fcy",
102
+ "Adjustments(Withholding Tax) in FCY": "adjustments_withholding_tax_in_fcy",
103
+ "Description": "description",
104
+ }
105
+
106
+ # Fixed zoom options
107
+ ZOOM_OPTIONS = [25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100, 110, 120, 130, 140, 150]
108
+
109
+ # ---- Session state ----
110
+ if "field_values" not in st.session_state:
111
+ st.session_state.field_values = {}
112
+ if "field_rects_orig" not in st.session_state:
113
+ st.session_state.field_rects_orig = {}
114
+ if "num_line_items" not in st.session_state:
115
+ st.session_state.num_line_items = {}
116
+ if "selected_image" not in st.session_state:
117
+ st.session_state.selected_image = None
118
+ if "zoom_values" not in st.session_state:
119
+ st.session_state.zoom_values = {}
120
+ if "rect_version" not in st.session_state:
121
+ st.session_state.rect_version = {}
122
+ if "image_data" not in st.session_state:
123
+ st.session_state.image_data = {}
124
+
125
+ # Pending delete - process at start before UI
126
+ if "pending_delete" not in st.session_state:
127
+ st.session_state.pending_delete = None
128
+
129
+ if st.session_state.pending_delete is not None:
130
+ img_name, field_key = st.session_state.pending_delete
131
+ if img_name in st.session_state.field_rects_orig:
132
+ st.session_state.field_rects_orig[img_name].pop(field_key, None)
133
+ if img_name in st.session_state.rect_version:
134
+ st.session_state.rect_version[img_name] += 1
135
+ st.session_state.pending_delete = None
136
+
137
+ # --- Helper functions ---
138
+ @st.cache_data
139
+ def load_image(file_content):
140
+ return Image.open(BytesIO(file_content)).convert("RGB")
141
+
142
+ def get_display_image_from_bytes(image_bytes, width, height):
143
+ """Create fresh display image from bytes - no caching to avoid stale PIL references"""
144
+ pil_image = Image.open(BytesIO(image_bytes)).convert("RGB")
145
+ resized = pil_image.resize((width, height), Image.LANCZOS)
146
+ resized = ImageEnhance.Sharpness(resized).enhance(1.2)
147
+ resized = ImageEnhance.Contrast(resized).enhance(1.1)
148
+ return resized
149
+
150
+ def get_default_zoom(pil_image):
151
+ """Calculate best fit zoom"""
152
+ MAX_WIDTH = 850
153
+ MAX_HEIGHT = 900
154
+ default_scale = min(MAX_WIDTH / pil_image.width, MAX_HEIGHT / pil_image.height, 1.0)
155
+ default_zoom = int(default_scale * 100)
156
+ # Find closest zoom option
157
+ closest = min(ZOOM_OPTIONS, key=lambda x: abs(x - default_zoom))
158
+ return closest
159
+
160
+ def build_gt_record_for_file(file_name: str) -> dict:
161
+ """
162
+ JSONL record for one remittance image:
163
+ {
164
+ "file_name": "<image>",
165
+ "gt_parse": {
166
+ "remittance_advice_details": {...},
167
+ "customer_supplier_details": {...},
168
+ "bank_details": {...},
169
+ "line_items": [...]
170
+ }
171
+ }
172
+ """
173
+ values = st.session_state.field_values.get(file_name, {})
174
+ num_items = st.session_state.num_line_items.get(file_name, 1)
175
+
176
+ def v(label: str) -> str:
177
+ return str(values.get(label, "")).strip()
178
+
179
+ gt_parse: dict = {}
180
+
181
+ # Header sections
182
+ for section_name, mapping in HEADER_GROUPS.items():
183
+ section_dict = {}
184
+ for ui_label, json_key in mapping.items():
185
+ section_dict[json_key] = v(ui_label)
186
+ gt_parse[section_name] = section_dict
187
+
188
+ # Line items
189
+ line_items = []
190
+ for idx in range(1, num_items + 1):
191
+ row = {}
192
+ any_non_empty = False
193
+ for ui_label, json_key in LINE_ITEM_FIELD_KEY_MAP.items():
194
+ key = f"Line {idx}: {ui_label}"
195
+ val = str(values.get(key, "")).strip()
196
+ row[json_key] = val
197
+ if val:
198
+ any_non_empty = True
199
+ if any_non_empty:
200
+ line_items.append(row)
201
+
202
+ gt_parse["line_items"] = line_items
203
+
204
+ return {
205
+ "file_name": file_name,
206
+ "gt_parse": gt_parse,
207
+ }
208
+
209
+ def has_any_label(fname: str) -> bool:
210
+ """Check if file has any labeled values"""
211
+ vals = st.session_state.field_values.get(fname, {})
212
+ return any(str(v).strip() for v in vals.values())
213
+
214
+ # --- Upload ---
215
+ uploaded_files = st.file_uploader(
216
+ "Upload remittance images",
217
+ type=["png", "jpg", "jpeg"],
218
+ accept_multiple_files=True,
219
+ )
220
+
221
+ if not uploaded_files:
222
+ st.info("Upload at least one image to begin.")
223
+ st.stop()
224
+
225
+ images = []
226
+ for f in uploaded_files:
227
+ f.seek(0)
228
+ content = f.read()
229
+ # Store image bytes in session state for stability across reruns
230
+ if f.name not in st.session_state.image_data:
231
+ st.session_state.image_data[f.name] = content
232
+ img = load_image(st.session_state.image_data[f.name])
233
+ images.append({"name": f.name, "image": img, "bytes": st.session_state.image_data[f.name]})
234
+
235
+ file_names = [img["name"] for img in images]
236
+ selected_name = st.selectbox("Select image", file_names)
237
+ st.session_state.selected_image = selected_name
238
+
239
+ selected_img_data = next(img for img in images if img["name"] == selected_name)
240
+ pil_image = selected_img_data["image"]
241
+ image_bytes = selected_img_data["bytes"]
242
+
243
+ # Init for this image
244
+ if selected_name not in st.session_state.field_values:
245
+ st.session_state.field_values[selected_name] = {}
246
+ if selected_name not in st.session_state.field_rects_orig:
247
+ st.session_state.field_rects_orig[selected_name] = {}
248
+ if selected_name not in st.session_state.num_line_items:
249
+ st.session_state.num_line_items[selected_name] = 1
250
+ if selected_name not in st.session_state.rect_version:
251
+ st.session_state.rect_version[selected_name] = 0
252
+ if selected_name not in st.session_state.zoom_values:
253
+ st.session_state.zoom_values[selected_name] = get_default_zoom(pil_image)
254
+
255
+ # ========== FIELD SELECTION ==========
256
+ st.markdown("---")
257
+
258
+ def add_line_item():
259
+ img = st.session_state.selected_image
260
+ if img:
261
+ st.session_state.num_line_items[img] += 1
262
+
263
+ def remove_line_item():
264
+ img = st.session_state.selected_image
265
+ if img and st.session_state.num_line_items[img] > 1:
266
+ last_num = st.session_state.num_line_items[img]
267
+ for lif in LINE_ITEM_FIELDS:
268
+ key = f"Line {last_num}: {lif}"
269
+ st.session_state.field_values[img].pop(key, None)
270
+ st.session_state.field_rects_orig[img].pop(key, None)
271
+ st.session_state.num_line_items[img] -= 1
272
+ st.session_state.rect_version[img] += 1
273
+
274
+ # Initialize field variables with defaults
275
+ display_field_name = SINGLE_FIELDS[0]
276
+ storage_field_name = SINGLE_FIELDS[0]
277
+ base_field_for_color = SINGLE_FIELDS[0]
278
+
279
+ sel_col1, sel_col2, sel_col3, sel_col4 = st.columns([1.5, 1.5, 2, 2])
280
+
281
+ with sel_col1:
282
+ field_type = st.radio("Type", ["Single", "Line Item"], horizontal=True, label_visibility="collapsed")
283
+
284
+ with sel_col2:
285
+ if field_type == "Single":
286
+ field_name = st.selectbox("Field", SINGLE_FIELDS, label_visibility="collapsed")
287
+ display_field_name = field_name
288
+ storage_field_name = field_name
289
+ base_field_for_color = field_name
290
+ else:
291
+ num_items = st.session_state.num_line_items[selected_name]
292
+ line_item_options = [f"Line {i+1}" for i in range(num_items)]
293
+ selected_line_item = st.selectbox("Line", line_item_options, label_visibility="collapsed")
294
+ line_item_num = int(selected_line_item.split()[1])
295
+
296
+ with sel_col3:
297
+ if field_type == "Line Item":
298
+ base_field = st.selectbox("Field", LINE_ITEM_FIELDS, label_visibility="collapsed")
299
+ display_field_name = f"{selected_line_item}: {base_field}"
300
+ storage_field_name = f"Line {line_item_num}: {base_field}"
301
+ base_field_for_color = base_field
302
+
303
+ with sel_col4:
304
+ if field_type == "Line Item":
305
+ # Line items +/- buttons next to line item dropdown
306
+ add_col, rem_col, info_col = st.columns([1, 1, 2])
307
+ with add_col:
308
+ st.button("➕", key=f"addli_{selected_name}", on_click=add_line_item, help="Add line item")
309
+ with rem_col:
310
+ if st.session_state.num_line_items[selected_name] > 1:
311
+ st.button("➖", key=f"remli_{selected_name}", on_click=remove_line_item, help="Remove line item")
312
+ with info_col:
313
+ st.write(f"Lines: **{st.session_state.num_line_items[selected_name]}**")
314
+
315
+ # Guard in case something weird happens
316
+ if not storage_field_name:
317
+ storage_field_name = display_field_name
318
+
319
+ field_color = FIELD_COLORS.get(base_field_for_color or display_field_name, "#FF0000")
320
+
321
+ st.markdown(f"**Current:** <span style='color:{field_color}'>●</span> {display_field_name}", unsafe_allow_html=True)
322
+
323
+ # ========== MAIN COLUMNS ==========
324
+ col1, col2 = st.columns([3, 2])
325
+
326
+ with col1:
327
+ # Zoom controls - selectbox + buttons
328
+ current_zoom = st.session_state.zoom_values[selected_name]
329
+ zoom_index = ZOOM_OPTIONS.index(current_zoom) if current_zoom in ZOOM_OPTIONS else 0
330
+
331
+ # Zoom callbacks
332
+ def do_zoom_out():
333
+ img = st.session_state.selected_image
334
+ curr = st.session_state.zoom_values[img]
335
+ idx = ZOOM_OPTIONS.index(curr) if curr in ZOOM_OPTIONS else 0
336
+ if idx > 0:
337
+ st.session_state.zoom_values[img] = ZOOM_OPTIONS[idx - 1]
338
+
339
+ def do_zoom_in():
340
+ img = st.session_state.selected_image
341
+ curr = st.session_state.zoom_values[img]
342
+ idx = ZOOM_OPTIONS.index(curr) if curr in ZOOM_OPTIONS else 0
343
+ if idx < len(ZOOM_OPTIONS) - 1:
344
+ st.session_state.zoom_values[img] = ZOOM_OPTIONS[idx + 1]
345
+
346
+ def do_zoom_fit():
347
+ img = st.session_state.selected_image
348
+ img_bytes = st.session_state.image_data.get(img)
349
+ if img_bytes:
350
+ pil_img = load_image(img_bytes)
351
+ st.session_state.zoom_values[img] = get_default_zoom(pil_img)
352
+
353
+ zoom_row1, zoom_row2, zoom_row3, zoom_row4 = st.columns([2, 1, 1, 1])
354
+
355
+ with zoom_row1:
356
+ zoom = st.selectbox(
357
+ "🔍 Zoom",
358
+ options=ZOOM_OPTIONS,
359
+ index=zoom_index,
360
+ format_func=lambda x: f"{x}%",
361
+ key=f"zoom_select_{selected_name}",
362
+ label_visibility="collapsed"
363
+ )
364
+ st.session_state.zoom_values[selected_name] = zoom
365
+
366
+ with zoom_row2:
367
+ st.button("➖", key="zoom_out", help="Zoom out", on_click=do_zoom_out)
368
+
369
+ with zoom_row3:
370
+ st.button("➕", key="zoom_in", help="Zoom in", on_click=do_zoom_in)
371
+
372
+ with zoom_row4:
373
+ st.button("Fit", key="zoom_fit", help="Fit to screen", on_click=do_zoom_fit)
374
+
375
+ # Get current zoom value
376
+ zoom = st.session_state.zoom_values[selected_name]
377
+
378
+ scale = zoom / 100.0
379
+ disp_w = int(pil_image.width * scale)
380
+ disp_h = int(pil_image.height * scale)
381
+
382
+ # Get display image - fresh PIL object each time from stable bytes
383
+ display_image = get_display_image_from_bytes(image_bytes, disp_w, disp_h)
384
+
385
+ st.caption(f"Original: {pil_image.width}×{pil_image.height} | Display: {disp_w}×{disp_h}")
386
+
387
+ has_rect = storage_field_name in st.session_state.field_rects_orig[selected_name]
388
+ if has_rect:
389
+ st.success(f"✅ Has rectangle. Draw again to replace.")
390
+ else:
391
+ st.warning(f"⬜ Draw rectangle for this field")
392
+
393
+ def orig_to_display(rect_orig, s):
394
+ return {
395
+ "type": "rect",
396
+ "left": rect_orig["left"] * s,
397
+ "top": rect_orig["top"] * s,
398
+ "width": rect_orig["width"] * s,
399
+ "height": rect_orig["height"] * s,
400
+ "fill": "rgba(0,0,0,0)",
401
+ "stroke": rect_orig.get("stroke", "#FF0000"),
402
+ "strokeWidth": rect_orig.get("strokeWidth", 2),
403
+ "scaleX": 1,
404
+ "scaleY": 1,
405
+ }
406
+
407
+ def display_to_orig(rect_display, s):
408
+ w = rect_display.get("width", 0) * rect_display.get("scaleX", 1)
409
+ h = rect_display.get("height", 0) * rect_display.get("scaleY", 1)
410
+ return {
411
+ "left": rect_display.get("left", 0) / s,
412
+ "top": rect_display.get("top", 0) / s,
413
+ "width": w / s,
414
+ "height": h / s,
415
+ "stroke": rect_display.get("stroke", "#FF0000"),
416
+ "strokeWidth": rect_display.get("strokeWidth", 2),
417
+ }
418
+
419
+ # Build display objects from stored rectangles
420
+ all_display_objects = []
421
+ for fld, rect_orig in st.session_state.field_rects_orig[selected_name].items():
422
+ disp_rect = orig_to_display(rect_orig, scale)
423
+ base = fld.split(": ", 1)[1] if ": " in fld else fld
424
+ disp_rect["stroke"] = FIELD_COLORS.get(base, "#FF0000")
425
+ disp_rect["strokeWidth"] = 3 if fld == storage_field_name else 2
426
+ all_display_objects.append(disp_rect)
427
+
428
+ initial_drawing = {"version": "4.4.0", "objects": all_display_objects}
429
+ expected_count = len(all_display_objects)
430
+
431
+ # Canvas key: includes rect count to force refresh when rectangles change
432
+ rect_ver = st.session_state.rect_version[selected_name]
433
+ num_rects = len(st.session_state.field_rects_orig[selected_name])
434
+ canvas_key = f"canvas_{selected_name}_z{zoom}_rv{rect_ver}_n{num_rects}"
435
+
436
+ # Render canvas
437
+ canvas_result = st_canvas(
438
+ background_image=display_image,
439
+ height=disp_h,
440
+ width=disp_w,
441
+ drawing_mode="rect",
442
+ stroke_width=3,
443
+ stroke_color=field_color,
444
+ fill_color="rgba(255,0,0,0.1)",
445
+ update_streamlit=True,
446
+ initial_drawing=initial_drawing,
447
+ key=canvas_key,
448
+ )
449
+
450
+ # Detect new rectangle
451
+ if canvas_result.json_data is not None:
452
+ objs = canvas_result.json_data.get("objects", [])
453
+ if len(objs) > expected_count:
454
+ new_rect_display = objs[-1]
455
+ new_rect_orig = display_to_orig(new_rect_display, scale)
456
+ new_rect_orig["stroke"] = field_color
457
+ st.session_state.field_rects_orig[selected_name][storage_field_name] = new_rect_orig
458
+
459
+ # Auto-run OCR
460
+ x1 = max(0, int(new_rect_orig["left"]))
461
+ y1 = max(0, int(new_rect_orig["top"]))
462
+ x2 = min(pil_image.width, int(new_rect_orig["left"] + new_rect_orig["width"]))
463
+ y2 = min(pil_image.height, int(new_rect_orig["top"] + new_rect_orig["height"]))
464
+
465
+ if x2 > x1 and y2 > y1:
466
+ crop = pil_image.crop((x1, y1, x2, y2))
467
+ try:
468
+ text = pytesseract.image_to_string(crop, config="--psm 6").strip()
469
+ if text:
470
+ st.session_state.field_values[selected_name][storage_field_name] = text
471
+ st.toast(f"✅ OCR: {text[:50]}{'...' if len(text) > 50 else ''}")
472
+ else:
473
+ st.toast(f"✅ Rectangle saved (no text detected)")
474
+ except Exception:
475
+ st.toast(f"✅ Rectangle saved")
476
+
477
+ with col2:
478
+ # ========== ALL VALUES SECTION (MOVED UP) ==========
479
+ st.markdown("---")
480
+ single_rects = sum(1 for f in st.session_state.field_rects_orig[selected_name] if not f.startswith("Line "))
481
+ num_items = st.session_state.num_line_items[selected_name]
482
+ line_rects = sum(1 for f in st.session_state.field_rects_orig[selected_name] if f.startswith("Line "))
483
+
484
+ st.write(f"**Single:** {single_rects}/{len(SINGLE_FIELDS)} | **Lines ({num_items}):** {line_rects}/{num_items * len(LINE_ITEM_FIELDS)}")
485
+
486
+ with st.expander("📋 All Values"):
487
+ for f in SINGLE_FIELDS:
488
+ v = st.session_state.field_values[selected_name].get(f, "")
489
+ if v.strip():
490
+ st.write(f"**{f}:** {v}")
491
+ for i in range(1, num_items + 1):
492
+ vals = [(lif, st.session_state.field_values[selected_name].get(f"Line {i}: {lif}", ""))
493
+ for lif in LINE_ITEM_FIELDS]
494
+ vals = [(lif, v) for lif, v in vals if v.strip()]
495
+ if vals:
496
+ st.write(f"**Line {i}:**")
497
+ for lif, v in vals:
498
+ st.write(f" {lif}: {v}")
499
+
500
+ # ========== OCR & VALUE SECTION (MOVED DOWN) ==========
501
+ st.markdown("---")
502
+ st.subheader("OCR & Value")
503
+
504
+ current_rect_orig = st.session_state.field_rects_orig[selected_name].get(storage_field_name)
505
+ current_val = st.session_state.field_values[selected_name].get(storage_field_name, "")
506
+
507
+ if current_rect_orig:
508
+ st.caption(f"📐 ({current_rect_orig['left']:.0f}, {current_rect_orig['top']:.0f}) - {current_rect_orig['width']:.0f}×{current_rect_orig['height']:.0f}")
509
+
510
+ x1 = max(0, int(current_rect_orig["left"]))
511
+ y1 = max(0, int(current_rect_orig["top"]))
512
+ x2 = min(pil_image.width, int(current_rect_orig["left"] + current_rect_orig["width"]))
513
+ y2 = min(pil_image.height, int(current_rect_orig["top"] + current_rect_orig["height"]))
514
+ if x2 > x1 and y2 > y1:
515
+ crop = pil_image.crop((x1, y1, x2, y2))
516
+ st.image(crop, caption="Selected Region", width=200)
517
+
518
+ new_val = st.text_area("Value (auto-filled by OCR)", value=current_val, height=80)
519
+
520
+ col_btn1, col_btn2, col_btn3 = st.columns(3)
521
+
522
+ with col_btn1:
523
+ if st.button("💾 Save"):
524
+ st.session_state.field_values[selected_name][storage_field_name] = new_val
525
+ st.success("Saved!")
526
+
527
+ with col_btn2:
528
+ if current_rect_orig and st.button("🔄 Re-OCR"):
529
+ x1 = max(0, int(current_rect_orig["left"]))
530
+ y1 = max(0, int(current_rect_orig["top"]))
531
+ x2 = min(pil_image.width, int(current_rect_orig["left"] + current_rect_orig["width"]))
532
+ y2 = min(pil_image.height, int(current_rect_orig["top"] + current_rect_orig["height"]))
533
+ if x2 > x1 and y2 > y1:
534
+ crop = pil_image.crop((x1, y1, x2, y2))
535
+ try:
536
+ text = pytesseract.image_to_string(crop, config="--psm 6").strip()
537
+ if text:
538
+ st.session_state.field_values[selected_name][storage_field_name] = text
539
+ st.success(f"OCR: {text}")
540
+ else:
541
+ st.warning("Empty result")
542
+ except Exception as e:
543
+ st.error(f"OCR failed: {e}")
544
+
545
+ with col_btn3:
546
+ def delete_rect():
547
+ st.session_state.pending_delete = (selected_name, storage_field_name)
548
+
549
+ if current_rect_orig:
550
+ st.button("🗑️ Delete", on_click=delete_rect)
551
+
552
+ # ========== EXPORT SECTION ==========
553
+ st.markdown("---")
554
+ st.subheader("📤 JSONL Export")
555
+
556
+ # Export ALL labeled remittances
557
+ records_all = [
558
+ build_gt_record_for_file(img["name"])
559
+ for img in images
560
+ if has_any_label(img["name"])
561
+ ]
562
+
563
+ if records_all:
564
+ all_jsonl_str = "\n".join(
565
+ json.dumps(rec, ensure_ascii=False) for rec in records_all
566
+ )
567
+ st.download_button(
568
+ "⬇️ Export ALL labeled remittances (JSONL)",
569
+ data=all_jsonl_str.encode("utf-8"),
570
+ file_name="remittances_ground_truth.jsonl",
571
+ mime="application/json",
572
+ )
573
+ else:
574
+ st.caption("No labeled remittances yet to export in bulk.")
575
+
576
+ # Export CURRENT remittance
577
+ current_record = build_gt_record_for_file(selected_name)
578
+ with st.expander("Preview CURRENT remittance JSON"):
579
+ st.json(current_record)
580
 
581
+ current_jsonl_str = json.dumps(current_record, ensure_ascii=False) + "\n"
582
+ st.download_button(
583
+ "⬇️ Export CURRENT remittance (JSONL)",
584
+ data=current_jsonl_str.encode("utf-8"),
585
+ file_name=f"{os.path.splitext(selected_name)[0]}_remittance.jsonl",
586
+ mime="application/json",
587
+ )