rasmodev commited on
Commit
f4bc948
Β·
verified Β·
1 Parent(s): cfbc812

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +344 -508
app.py CHANGED
@@ -1,5 +1,5 @@
1
  """
2
- Valuation Sheet OCR β€” Production Streamlit App
3
  Model: rasmodev/Handwriting_trocr_model
4
  """
5
  import io, time, logging
@@ -8,387 +8,282 @@ import pandas as pd
8
  from PIL import Image
9
 
10
  st.set_page_config(
11
- page_title="ValuationAI β€” OCR",
12
  page_icon="πŸ“‹",
13
  layout="wide",
14
  initial_sidebar_state="collapsed",
15
  )
16
  logging.basicConfig(level=logging.INFO)
17
 
18
- # ── Design System ─────────────────────────────────────────────────────────────
19
  st.markdown("""
20
  <style>
21
- @import url('https://fonts.googleapis.com/css2?family=Playfair+Display:wght@400;600;700&family=Outfit:wght@300;400;500;600&display=swap');
22
 
23
- *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
24
-
25
- html, body, [class*="css"], .stApp {
26
  font-family: 'Outfit', sans-serif;
27
- background: #0A0F1E;
28
- color: #E8EAF0;
29
  }
30
 
31
- /* ── Hide default Streamlit chrome ── */
32
- #MainMenu, footer, header { visibility: hidden; }
33
- .block-container { padding: 0 !important; max-width: 100% !important; }
34
 
35
- /* ── Page shell ── */
36
- .page-shell {
37
- min-height: 100vh;
38
- background:
39
- radial-gradient(ellipse 80% 50% at 10% 0%, rgba(99,102,241,0.18) 0%, transparent 60%),
40
- radial-gradient(ellipse 60% 40% at 90% 100%, rgba(16,185,129,0.12) 0%, transparent 55%),
41
- #0A0F1E;
42
- padding: 2.5rem 3rem 4rem;
43
  }
44
 
45
- /* ── Nav bar ── */
46
- .nav {
 
 
 
47
  display: flex;
48
  align-items: center;
49
  justify-content: space-between;
50
- margin-bottom: 3.5rem;
51
  padding-bottom: 1.5rem;
52
  border-bottom: 1px solid rgba(255,255,255,0.06);
 
53
  }
54
- .nav-brand {
55
- display: flex; align-items: center; gap: 0.75rem;
56
- }
57
- .nav-dot {
58
  width: 10px; height: 10px; border-radius: 50%;
59
  background: #6366F1;
60
- box-shadow: 0 0 12px rgba(99,102,241,0.8);
61
- animation: pulse 2s infinite;
62
  }
63
- @keyframes pulse {
64
- 0%, 100% { opacity: 1; transform: scale(1); }
65
- 50% { opacity: 0.6; transform: scale(0.85); }
66
  }
67
- .nav-title {
68
  font-family: 'Playfair Display', serif;
69
- font-size: 1.35rem;
70
- font-weight: 700;
71
- letter-spacing: -0.02em;
72
- color: #fff;
73
- }
74
- .nav-subtitle {
75
- font-size: 0.72rem;
76
- font-weight: 500;
77
- letter-spacing: 0.12em;
78
- text-transform: uppercase;
79
- color: #6B7280;
80
- }
81
- .nav-badge {
82
- font-size: 0.72rem;
83
- background: rgba(99,102,241,0.15);
84
- border: 1px solid rgba(99,102,241,0.3);
85
- color: #A5B4FC;
86
- padding: 0.3rem 0.8rem;
87
- border-radius: 20px;
88
- font-weight: 500;
89
- letter-spacing: 0.04em;
90
  }
91
 
92
  /* ── Hero ── */
93
- .hero-section {
94
- margin-bottom: 3rem;
95
- }
96
  .hero-eyebrow {
97
- font-size: 0.72rem;
98
- font-weight: 600;
99
- letter-spacing: 0.18em;
100
- text-transform: uppercase;
101
- color: #6366F1;
102
- margin-bottom: 0.9rem;
103
- }
104
- .hero-headline {
105
  font-family: 'Playfair Display', serif;
106
- font-size: clamp(2.4rem, 4vw, 3.6rem);
107
- font-weight: 700;
108
- line-height: 1.1;
109
- letter-spacing: -0.03em;
110
- color: #fff;
111
- margin-bottom: 1rem;
112
- }
113
- .hero-headline span {
114
- background: linear-gradient(135deg, #6366F1 0%, #10B981 100%);
115
- -webkit-background-clip: text;
116
- -webkit-text-fill-color: transparent;
117
- background-clip: text;
118
  }
119
- .hero-desc {
120
- font-size: 1.05rem;
121
- font-weight: 300;
122
- color: #9CA3AF;
123
- line-height: 1.7;
124
- max-width: 520px;
125
  }
126
-
127
- /* ── Two-col layout ── */
128
- .two-col {
129
- display: grid;
130
- grid-template-columns: 1fr 1fr;
131
- gap: 1.5rem;
132
- margin-bottom: 1.5rem;
133
  }
134
 
135
  /* ── Cards ── */
136
- .glass-card {
137
- background: rgba(255,255,255,0.03);
138
- border: 1px solid rgba(255,255,255,0.07);
139
- border-radius: 20px;
140
- padding: 2rem;
141
- backdrop-filter: blur(12px);
142
  transition: border-color 0.2s;
143
  }
144
- .glass-card:hover { border-color: rgba(99,102,241,0.3); }
145
-
146
  .card-label {
147
- font-size: 0.68rem;
148
- font-weight: 600;
149
- letter-spacing: 0.14em;
150
- text-transform: uppercase;
151
- color: #6B7280;
152
- margin-bottom: 1.2rem;
153
- display: flex; align-items: center; gap: 0.5rem;
154
  }
155
  .card-label::before {
156
- content: '';
157
- display: inline-block;
158
- width: 18px; height: 2px;
159
- background: #6366F1;
160
- border-radius: 2px;
161
- }
162
-
163
- /* ── Upload zone ── */
164
- .upload-zone {
165
- border: 2px dashed rgba(99,102,241,0.35);
166
- border-radius: 16px;
167
- padding: 3rem 2rem;
168
- text-align: center;
169
- background: rgba(99,102,241,0.04);
170
- transition: all 0.25s;
171
- cursor: pointer;
172
- margin-bottom: 1rem;
173
- }
174
- .upload-zone:hover {
175
- border-color: rgba(99,102,241,0.7);
176
- background: rgba(99,102,241,0.08);
177
- }
178
- .upload-icon { font-size: 2.5rem; margin-bottom: 0.75rem; }
179
- .upload-main {
180
- font-size: 1rem; font-weight: 500; color: #E8EAF0; margin-bottom: 0.4rem;
181
- }
182
- .upload-sub { font-size: 0.8rem; color: #6B7280; }
183
- .file-chip {
184
- display: inline-flex; align-items: center; gap: 0.4rem;
185
  background: rgba(16,185,129,0.1);
186
- border: 1px solid rgba(16,185,129,0.25);
187
- color: #6EE7B7;
188
- padding: 0.3rem 0.75rem;
189
- border-radius: 20px;
190
- font-size: 0.75rem;
191
- font-weight: 500;
192
- margin: 0.2rem;
193
- }
194
-
195
- /* ── Engine pills ── */
196
- .engine-row { display: flex; gap: 0.6rem; margin-top: 0.5rem; flex-wrap: wrap; }
197
- .engine-pill {
198
- font-size: 0.75rem; font-weight: 500;
199
- padding: 0.35rem 0.9rem; border-radius: 20px;
200
- border: 1px solid rgba(255,255,255,0.1);
201
- color: #9CA3AF; background: transparent; cursor: pointer;
202
- transition: all 0.2s;
203
- }
204
- .engine-pill.active {
205
- background: rgba(99,102,241,0.2);
206
- border-color: rgba(99,102,241,0.5);
207
- color: #A5B4FC;
208
- }
209
-
210
- /* ── Run button ── */
211
- .stButton > button {
212
- width: 100% !important;
213
- background: linear-gradient(135deg, #6366F1 0%, #4F46E5 100%) !important;
214
- color: white !important;
215
- border: none !important;
216
- border-radius: 14px !important;
217
- padding: 1rem 2rem !important;
218
- font-family: 'Outfit', sans-serif !important;
219
- font-size: 0.95rem !important;
220
- font-weight: 600 !important;
221
- letter-spacing: 0.02em !important;
222
- cursor: pointer !important;
223
- transition: all 0.2s !important;
224
- box-shadow: 0 4px 24px rgba(99,102,241,0.35) !important;
225
- margin-top: 1rem !important;
226
  }
227
- .stButton > button:hover {
228
- transform: translateY(-1px) !important;
229
- box-shadow: 0 8px 32px rgba(99,102,241,0.5) !important;
 
 
230
  }
231
- .stButton > button:disabled {
232
- background: rgba(255,255,255,0.05) !important;
233
- color: #4B5563 !important;
234
- box-shadow: none !important;
235
- transform: none !important;
236
- cursor: not-allowed !important;
237
  }
238
-
239
- /* ── Progress ── */
240
- .stProgress > div > div > div {
241
- background: linear-gradient(90deg, #6366F1, #10B981) !important;
242
- border-radius: 4px !important;
243
  }
244
- .stProgress > div > div {
245
- background: rgba(255,255,255,0.06) !important;
246
- border-radius: 4px !important;
247
- height: 6px !important;
248
- }
249
-
250
- /* ── Stat cards ── */
251
- .stat-row {
252
- display: grid;
253
- grid-template-columns: repeat(4, 1fr);
254
- gap: 1rem;
255
- margin-bottom: 1.5rem;
256
- }
257
- .stat-card {
258
- background: rgba(255,255,255,0.03);
259
- border: 1px solid rgba(255,255,255,0.07);
260
- border-radius: 16px;
261
- padding: 1.4rem 1.6rem;
262
- position: relative;
263
- overflow: hidden;
264
- }
265
- .stat-card::before {
266
- content: '';
267
- position: absolute;
268
- top: 0; left: 0; right: 0;
269
- height: 2px;
270
- background: linear-gradient(90deg, #6366F1, #10B981);
271
- }
272
- .stat-value {
273
  font-family: 'Playfair Display', serif;
274
- font-size: 2.2rem;
275
- font-weight: 700;
276
- color: #fff;
277
- line-height: 1;
278
- margin-bottom: 0.3rem;
279
- }
280
- .stat-label {
281
- font-size: 0.72rem;
282
- font-weight: 500;
283
- letter-spacing: 0.1em;
284
- text-transform: uppercase;
285
- color: #6B7280;
286
- }
287
-
288
- /* ── Dataframe ── */
289
- .stDataFrame {
290
- border-radius: 16px !important;
291
- overflow: hidden !important;
292
- border: 1px solid rgba(255,255,255,0.07) !important;
293
  }
294
- .stDataFrame iframe { border-radius: 16px !important; }
295
 
296
- /* ── Download button ── */
297
- div[data-testid="stDownloadButton"] button {
298
- background: rgba(16,185,129,0.12) !important;
299
- border: 1px solid rgba(16,185,129,0.3) !important;
300
- color: #6EE7B7 !important;
301
- border-radius: 12px !important;
302
- padding: 0.75rem 1.5rem !important;
303
- font-family: 'Outfit', sans-serif !important;
304
- font-weight: 600 !important;
305
- font-size: 0.9rem !important;
306
- box-shadow: none !important;
307
- width: auto !important;
308
- transition: all 0.2s !important;
309
  }
310
- div[data-testid="stDownloadButton"] button:hover {
311
- background: rgba(16,185,129,0.22) !important;
312
- border-color: rgba(16,185,129,0.5) !important;
313
- transform: translateY(-1px) !important;
 
 
 
 
 
 
314
  }
315
 
316
- /* ── File uploader override ── */
317
- [data-testid="stFileUploader"] {
318
- background: transparent !important;
 
319
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
320
  [data-testid="stFileUploader"] section {
321
  background: rgba(99,102,241,0.04) !important;
322
- border: 2px dashed rgba(99,102,241,0.35) !important;
323
- border-radius: 16px !important;
324
- padding: 2rem !important;
325
- transition: all 0.25s !important;
326
  }
327
  [data-testid="stFileUploader"] section:hover {
328
- border-color: rgba(99,102,241,0.7) !important;
329
  background: rgba(99,102,241,0.08) !important;
330
  }
331
- [data-testid="stFileUploader"] label { color: #9CA3AF !important; font-size: 0.9rem !important; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
332
 
333
- /* ── Selectbox ── */
334
  [data-testid="stSelectbox"] > div > div {
335
  background: rgba(255,255,255,0.04) !important;
336
- border: 1px solid rgba(255,255,255,0.1) !important;
337
- border-radius: 12px !important;
338
- color: #E8EAF0 !important;
339
  }
340
 
341
- /* ── Divider ── */
342
- .section-divider {
343
- height: 1px;
344
- background: linear-gradient(90deg, transparent, rgba(255,255,255,0.08), transparent);
345
- margin: 2.5rem 0;
346
  }
347
 
348
- /* ── Result header ── */
349
- .result-header {
350
- display: flex; align-items: center; justify-content: space-between;
351
- margin-bottom: 1.5rem;
352
  }
353
- .result-title {
354
- font-family: 'Playfair Display', serif;
355
- font-size: 1.5rem;
356
- font-weight: 600;
357
- color: #fff;
358
- }
359
- .result-tag {
360
- font-size: 0.72rem;
361
- font-weight: 600;
362
- letter-spacing: 0.1em;
363
- text-transform: uppercase;
364
- color: #10B981;
365
- background: rgba(16,185,129,0.1);
366
- border: 1px solid rgba(16,185,129,0.2);
367
- padding: 0.3rem 0.8rem;
368
- border-radius: 20px;
369
  }
370
 
371
- /* ── Toast-like success ── */
372
- .success-banner {
373
- background: rgba(16,185,129,0.1);
374
- border: 1px solid rgba(16,185,129,0.25);
375
- border-radius: 12px;
376
- padding: 1rem 1.5rem;
377
- display: flex; align-items: center; gap: 0.75rem;
378
- margin-bottom: 1.5rem;
379
- font-size: 0.9rem;
380
- color: #6EE7B7;
381
- font-weight: 500;
382
- }
383
-
384
- /* Spinner color */
385
- .stSpinner > div { border-top-color: #6366F1 !important; }
 
 
 
 
 
 
 
 
 
 
 
386
  </style>
387
  """, unsafe_allow_html=True)
388
 
389
 
390
- # ── Model loading ─────────────────────────────────────────────────────────────
391
- @st.cache_resource(show_spinner="Initialising model…")
 
 
392
  def load_model():
393
  import torch
394
  from transformers import TrOCRProcessor, VisionEncoderDecoderModel
@@ -405,7 +300,9 @@ def load_easyocr():
405
  return easyocr.Reader(["en"], gpu=False)
406
 
407
 
408
- # ── OCR ───────────────────────────────────────────────────────────────────────
 
 
409
  def run_finetuned(img):
410
  import torch
411
  processor, model, device = load_model()
@@ -422,12 +319,10 @@ def run_easyocr(img):
422
  def ocr_image(img, engine):
423
  try:
424
  return run_finetuned(img) if engine == "finetuned" else run_easyocr(img)
425
- except Exception as e:
426
  try: return run_easyocr(img)
427
  except: return ""
428
 
429
-
430
- # ── PDF handler ───────────────────────────────────────────────────────────────
431
  def pdf_to_images(file_bytes, dpi=200, max_pages=None):
432
  import fitz
433
  doc = fitz.open(stream=file_bytes, filetype="pdf")
@@ -440,8 +335,6 @@ def pdf_to_images(file_bytes, dpi=200, max_pages=None):
440
  doc.close()
441
  return pages
442
 
443
-
444
- # ── Field extraction ──────────────────────────────────────────────────────────
445
  def extract_fields(text, filename):
446
  import re
447
  def find(label):
@@ -450,179 +343,151 @@ def extract_fields(text, filename):
450
  amounts = re.findall(r'AMT:\s*([\d,]+)', text)
451
  dates = re.search(r'DATE:\s*(\d{4}-\d{2}-\d{2})', text)
452
  return {
453
- "File": filename,
454
- "Plot Number": find("PLOT"),
455
- "Location": find("LOC"),
456
- "Area": find("AREA"),
457
- "Amount (KES)": int(amounts[0].replace(",","")) if amounts else None,
458
- "Date": dates.group(1) if dates else None,
459
- "VOS": find("VOS"),
460
  }
461
 
462
-
463
- # ── Excel export ──────────────────────────────────────────────────────────────
464
  def to_excel(records):
465
  from openpyxl import load_workbook
466
- from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
467
  from openpyxl.utils import get_column_letter
468
  buf = io.BytesIO()
469
- df = pd.DataFrame(records)
470
- df.to_excel(buf, index=False, sheet_name="Valuation Data")
471
  buf.seek(0)
472
- wb = load_workbook(buf)
473
- ws = wb.active
474
- thin = Side(style="thin", color="E5E7EB")
475
- for col_idx, cell in enumerate(ws[1], 1):
476
  cell.font = Font(name="Calibri", bold=True, color="FFFFFF", size=11)
477
  cell.fill = PatternFill("solid", start_color="1E1B4B")
478
  cell.alignment = Alignment(horizontal="center", vertical="center")
479
- cell.border = Border(bottom=Side(style="medium", color="6366F1"))
480
- ws.column_dimensions[get_column_letter(col_idx)].width = 22
481
- ws.row_dimensions[1].height = 32
482
- for row in ws.iter_rows(min_row=2, max_row=ws.max_row):
483
- for i, cell in enumerate(row):
484
  cell.alignment = Alignment(vertical="center", wrap_text=True)
485
- cell.border = Border(
486
- left=Side(style="thin", color="F3F4F6"),
487
- right=Side(style="thin", color="F3F4F6"),
488
- top=Side(style="thin", color="F3F4F6"),
489
- bottom=Side(style="thin", color="F3F4F6"),
490
- )
491
  if cell.row % 2 == 0:
492
- cell.fill = PatternFill("solid", start_color="F8F7FF")
493
  ws.freeze_panes = "A2"
494
- out = io.BytesIO()
495
- wb.save(out)
496
  return out.getvalue()
497
 
498
 
499
- # ═════════════════════════════════════════════════════════════════════════════
500
  # SESSION STATE
501
- # ═════════════════════════════════════════════════════════════════════════════
502
- for k, v in [("records", []), ("errors", []), ("excel_bytes", None), ("processed", False)]:
503
  if k not in st.session_state:
504
  st.session_state[k] = v
505
 
506
 
507
- # ═══════════════════════���═════════════════════════════════════════════════════
508
- # PAGE SHELL OPEN
509
- # ═════════════════════════════════════════════════════════════════════════════
510
- st.markdown('<div class="page-shell">', unsafe_allow_html=True)
511
-
512
- # ── Nav ───────────────────────────────────────────────────────────────────────
513
  st.markdown("""
514
- <div class="nav">
515
- <div class="nav-brand">
516
- <div class="nav-dot"></div>
517
  <div>
518
- <div class="nav-title">ValuationAI</div>
519
- <div class="nav-subtitle">Handwriting Recognition System</div>
520
  </div>
521
  </div>
522
- <div class="nav-badge">rasmodev/Handwriting_trocr_model</div>
523
  </div>
524
  """, unsafe_allow_html=True)
525
 
526
- # ── Hero ──────────────────────────────────────────────────────────────────────
 
 
 
527
  st.markdown("""
528
- <div class="hero-section">
529
- <div class="hero-eyebrow">AI-Powered Document Intelligence</div>
530
- <div class="hero-headline">
531
- Extract data from<br><span>handwritten valuations</span>
532
- </div>
533
- <div class="hero-desc">
534
- Upload scanned valuation sheet PDFs and get a clean, structured Excel export
535
- in seconds β€” powered by a fine-tuned TrOCR model.
536
- </div>
537
- </div>
538
  """, unsafe_allow_html=True)
539
 
540
- # ═════════════════════════════════════════════════════════════════════════════
541
- # UPLOAD + SETTINGS
542
- # ═════════════════════════════════════════════════════════════════════════════
543
- col_upload, col_settings = st.columns([3, 2], gap="large")
544
 
545
- with col_upload:
546
- st.markdown('<div class="glass-card">', unsafe_allow_html=True)
547
- st.markdown('<div class="card-label">Upload Documents</div>', unsafe_allow_html=True)
 
 
 
 
548
  uploaded_files = st.file_uploader(
549
- "Drop PDF files here",
550
- type=["pdf", "png", "jpg", "jpeg", "tiff", "tif", "bmp"],
551
  accept_multiple_files=True,
552
  label_visibility="collapsed",
553
  )
554
  if uploaded_files:
555
  chips = "".join(
556
- f'<span class="file-chip">πŸ“„ {f.name[:28]}{"…" if len(f.name)>28 else ""}</span>'
557
  for f in uploaded_files
558
  )
559
- st.markdown(f'<div style="margin-top:1rem">{chips}</div>', unsafe_allow_html=True)
560
  st.markdown('</div>', unsafe_allow_html=True)
561
 
562
- with col_settings:
563
- st.markdown('<div class="glass-card">', unsafe_allow_html=True)
564
- st.markdown('<div class="card-label">Processing Settings</div>', unsafe_allow_html=True)
565
 
566
- engine_choice = st.selectbox(
567
  "OCR Engine",
568
  ["🟒 Fine-tuned Model (Recommended)", "πŸ”΅ EasyOCR (Fallback)"],
569
  label_visibility="visible",
570
  )
571
- engine = "finetuned" if "Fine-tuned" in engine_choice else "easyocr"
572
 
573
- dpi = st.select_slider(
574
- "Scan Resolution (DPI)",
575
- options=[150, 200, 250, 300],
576
- value=200,
577
- )
578
 
579
- max_p = st.number_input("Max pages per PDF (0 = all)", min_value=0, max_value=100, value=0)
580
  max_pages = None if max_p == 0 else int(max_p)
581
 
582
- run_btn = st.button(
583
- "⚑ Run OCR & Extract",
 
 
 
 
 
 
584
  disabled=not uploaded_files,
585
  use_container_width=True,
586
  )
587
- st.markdown('</div>', unsafe_allow_html=True)
588
 
589
 
590
- # ═════════════════════════════════════════════════════════════════════════════
591
  # PROCESSING
592
- # ═════════════════════════════════════════════════════════════════════════════
593
- if run_btn and uploaded_files:
594
- st.session_state.records = []
595
- st.session_state.errors = []
596
- st.session_state.excel_bytes = None
597
- st.session_state.processed = False
598
-
599
- st.markdown('<div class="section-divider"></div>', unsafe_allow_html=True)
600
 
 
601
  bar = st.progress(0.0)
602
  status = st.empty()
603
  t0 = time.time()
604
- total = len(uploaded_files)
605
 
606
  for fi, uf in enumerate(uploaded_files):
607
  fname = uf.name
608
  file_bytes = uf.read()
609
- bar.progress((fi) / total, text=f"Reading {fname}…")
610
 
611
  try:
612
  ext = fname.lower().rsplit(".", 1)[-1]
613
- if ext == "pdf":
614
- pages = pdf_to_images(file_bytes, dpi=dpi, max_pages=max_pages)
615
- else:
616
- img = Image.open(io.BytesIO(file_bytes)).convert("RGB")
617
- pages = [(1, img)]
618
 
619
  for page_no, img in pages:
620
- status.markdown(
621
- f'<div style="font-size:0.82rem;color:#9CA3AF;margin-bottom:0.5rem">'
622
- f'Processing <strong style="color:#A5B4FC">{fname}</strong>'
623
- f' β€” page {page_no} of {len(pages)}</div>',
624
- unsafe_allow_html=True,
625
- )
626
  text = ocr_image(img, engine)
627
  record = extract_fields(text, fname)
628
  st.session_state.records.append(record)
@@ -630,125 +495,96 @@ if run_btn and uploaded_files:
630
  except Exception as e:
631
  st.session_state.errors.append(f"{fname}: {e}")
632
 
633
- bar.progress((fi + 1) / total)
634
 
635
- elapsed = time.time() - t0
636
- bar.empty()
637
- status.empty()
638
 
639
- if st.session_state.records:
640
- st.session_state.excel_bytes = to_excel(st.session_state.records)
641
- st.session_state.processed = True
642
-
643
- st.success(f"βœ… Processed {len(st.session_state.records)} page(s) from {total} file(s) in {elapsed:.1f}s")
644
 
645
 
646
- # ═════════════════════════════════════════════════════════════════════════════
647
  # RESULTS
648
- # ═════════════════════════════════════════════════════════════════════════════
649
- if st.session_state.processed and st.session_state.records:
650
  records = st.session_state.records
651
  df = pd.DataFrame(records)
652
 
653
- st.markdown('<div class="section-divider"></div>', unsafe_allow_html=True)
654
 
655
- # ── Stats ──────────────────────────────────────────────────────────────
656
- n_plots = df["Plot Number"].notna().sum() if "Plot Number" in df else 0
657
- n_amounts = df["Amount (KES)"].notna().sum() if "Amount (KES)" in df else 0
658
- n_dates = df["Date"].notna().sum() if "Date" in df else 0
659
 
660
  st.markdown(f"""
661
- <div class="stat-row">
662
- <div class="stat-card">
663
- <div class="stat-value">{len(records)}</div>
664
- <div class="stat-label">Pages Processed</div>
665
  </div>
666
- <div class="stat-card">
667
- <div class="stat-value">{n_plots}</div>
668
- <div class="stat-label">Plot Numbers</div>
669
  </div>
670
- <div class="stat-card">
671
- <div class="stat-value">{n_amounts}</div>
672
- <div class="stat-label">Amounts Extracted</div>
673
  </div>
674
- <div class="stat-card">
675
- <div class="stat-value">{n_dates}</div>
676
- <div class="stat-label">Dates Captured</div>
677
  </div>
678
  </div>
679
  """, unsafe_allow_html=True)
680
 
681
- # ── Results header + download ──────────────────────────────────────────
682
- r_col1, r_col2 = st.columns([3, 1])
683
- with r_col1:
684
- st.markdown("""
685
- <div class="result-header">
686
- <div class="result-title">Extracted Data</div>
687
- <div class="result-tag">βœ“ Ready to export</div>
688
- </div>
689
- """, unsafe_allow_html=True)
690
- with r_col2:
691
  if st.session_state.excel_bytes:
692
  st.download_button(
693
- label="⬇ Download Excel",
694
  data=st.session_state.excel_bytes,
695
  file_name="valuation_extracted.xlsx",
696
  mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
697
  )
698
 
699
- # ── Dataframe ──────────────────────────────────────────────────────────
700
- st.dataframe(
701
- df,
702
- use_container_width=True,
703
- height=min(80 + len(df) * 38, 520),
704
- hide_index=True,
705
- )
706
 
 
707
  if st.session_state.errors:
708
- st.markdown('<div class="section-divider"></div>', unsafe_allow_html=True)
709
  with st.expander(f"οΏ½οΏ½ {len(st.session_state.errors)} file(s) had errors"):
710
  for e in st.session_state.errors:
711
- st.markdown(
712
- f'<div style="font-size:0.85rem;color:#F87171;padding:0.3rem 0">{e}</div>',
713
- unsafe_allow_html=True,
714
- )
715
 
716
 
717
- # ═════════════════════════════════════════════════════════════════════════════
718
  # EMPTY STATE
719
- # ═════════════════════════════════════════════════════════════════════════════
720
- if not st.session_state.processed and not uploaded_files:
721
- st.markdown('<div class="section-divider"></div>', unsafe_allow_html=True)
722
- st.markdown("""
723
- <div style="display:grid;grid-template-columns:repeat(3,1fr);gap:1.5rem;margin-top:1rem">
724
-
725
- <div class="glass-card" style="text-align:center;padding:2.5rem 1.5rem">
726
- <div style="font-size:2rem;margin-bottom:1rem">πŸ“„</div>
727
- <div style="font-weight:600;color:#E8EAF0;margin-bottom:0.5rem">Upload PDFs</div>
728
- <div style="font-size:0.82rem;color:#6B7280;line-height:1.6">
729
- Drag in one or multiple scanned valuation sheet PDFs
730
- </div>
731
- </div>
732
-
733
- <div class="glass-card" style="text-align:center;padding:2.5rem 1.5rem">
734
- <div style="font-size:2rem;margin-bottom:1rem">πŸ€–</div>
735
- <div style="font-weight:600;color:#E8EAF0;margin-bottom:0.5rem">AI Reads Fields</div>
736
- <div style="font-size:0.82rem;color:#6B7280;line-height:1.6">
737
- Fine-tuned TrOCR extracts plot numbers, amounts, dates and locations
738
- </div>
739
- </div>
740
-
741
- <div class="glass-card" style="text-align:center;padding:2.5rem 1.5rem">
742
- <div style="font-size:2rem;margin-bottom:1rem">πŸ“Š</div>
743
- <div style="font-weight:600;color:#E8EAF0;margin-bottom:0.5rem">Download Excel</div>
744
- <div style="font-size:0.82rem;color:#6B7280;line-height:1.6">
745
- Get a clean, formatted spreadsheet ready for data entry or analysis
746
- </div>
747
- </div>
748
-
749
- </div>
750
- """, unsafe_allow_html=True)
751
-
752
-
753
- # PAGE SHELL CLOSE
754
- st.markdown('</div>', unsafe_allow_html=True)
 
1
  """
2
+ ValuationAI OCR β€” Streamlit App
3
  Model: rasmodev/Handwriting_trocr_model
4
  """
5
  import io, time, logging
 
8
  from PIL import Image
9
 
10
  st.set_page_config(
11
+ page_title="ValuationAI",
12
  page_icon="πŸ“‹",
13
  layout="wide",
14
  initial_sidebar_state="collapsed",
15
  )
16
  logging.basicConfig(level=logging.INFO)
17
 
 
18
  st.markdown("""
19
  <style>
20
+ @import url('https://fonts.googleapis.com/css2?family=Playfair+Display:wght@600;700&family=Outfit:wght@300;400;500;600&display=swap');
21
 
22
+ html, body, [class*="css"] {
 
 
23
  font-family: 'Outfit', sans-serif;
24
+ background-color: #080D1A;
25
+ color: #E2E8F0;
26
  }
27
 
28
+ .stApp {
29
+ background: #080D1A;
30
+ }
31
 
32
+ /* Remove default padding */
33
+ .block-container {
34
+ padding-top: 2rem !important;
35
+ padding-bottom: 3rem !important;
36
+ max-width: 1200px !important;
 
 
 
37
  }
38
 
39
+ /* Hide Streamlit chrome */
40
+ #MainMenu, footer { visibility: hidden; }
41
+
42
+ /* ── Brand bar ── */
43
+ .brand {
44
  display: flex;
45
  align-items: center;
46
  justify-content: space-between;
 
47
  padding-bottom: 1.5rem;
48
  border-bottom: 1px solid rgba(255,255,255,0.06);
49
+ margin-bottom: 2.5rem;
50
  }
51
+ .brand-left { display: flex; align-items: center; gap: 10px; }
52
+ .brand-dot {
 
 
53
  width: 10px; height: 10px; border-radius: 50%;
54
  background: #6366F1;
55
+ box-shadow: 0 0 14px rgba(99,102,241,0.9);
56
+ animation: glow 2s ease-in-out infinite;
57
  }
58
+ @keyframes glow {
59
+ 0%,100% { opacity:1; transform:scale(1); }
60
+ 50% { opacity:0.5; transform:scale(0.8); }
61
  }
62
+ .brand-name {
63
  font-family: 'Playfair Display', serif;
64
+ font-size: 1.3rem; font-weight: 700; color: #fff;
65
+ }
66
+ .brand-tag {
67
+ font-size: 0.68rem; color: #4B5563; letter-spacing: 0.1em;
68
+ text-transform: uppercase; font-weight: 500;
69
+ }
70
+ .model-pill {
71
+ font-size: 0.7rem; font-weight: 500;
72
+ background: rgba(99,102,241,0.12);
73
+ border: 1px solid rgba(99,102,241,0.28);
74
+ color: #A5B4FC; padding: 0.3rem 0.9rem;
75
+ border-radius: 20px; letter-spacing: 0.04em;
 
 
 
 
 
 
 
 
 
76
  }
77
 
78
  /* ── Hero ── */
 
 
 
79
  .hero-eyebrow {
80
+ font-size: 0.68rem; font-weight: 600; letter-spacing: 0.2em;
81
+ text-transform: uppercase; color: #6366F1; margin-bottom: 0.7rem;
82
+ }
83
+ .hero-h1 {
 
 
 
 
84
  font-family: 'Playfair Display', serif;
85
+ font-size: 2.8rem; font-weight: 700; line-height: 1.12;
86
+ letter-spacing: -0.025em; color: #fff; margin-bottom: 0.8rem;
 
 
 
 
 
 
 
 
 
 
87
  }
88
+ .hero-h1 span {
89
+ background: linear-gradient(130deg, #6366F1, #10B981);
90
+ -webkit-background-clip: text; -webkit-text-fill-color: transparent;
91
+ background-clip: text;
 
 
92
  }
93
+ .hero-sub {
94
+ font-size: 1rem; font-weight: 300; color: #6B7280;
95
+ line-height: 1.7; max-width: 460px; margin-bottom: 2.5rem;
 
 
 
 
96
  }
97
 
98
  /* ── Cards ── */
99
+ .card {
100
+ background: rgba(255,255,255,0.025);
101
+ border: 1px solid rgba(255,255,255,0.065);
102
+ border-radius: 18px; padding: 1.8rem;
 
 
103
  transition: border-color 0.2s;
104
  }
105
+ .card:hover { border-color: rgba(99,102,241,0.28); }
 
106
  .card-label {
107
+ font-size: 0.65rem; font-weight: 600; letter-spacing: 0.15em;
108
+ text-transform: uppercase; color: #4B5563; margin-bottom: 1.2rem;
109
+ display: flex; align-items: center; gap: 8px;
 
 
 
 
110
  }
111
  .card-label::before {
112
+ content:''; display:inline-block; width:16px; height:2px;
113
+ background:#6366F1; border-radius:2px;
114
+ }
115
+
116
+ /* ── File chips ── */
117
+ .chip {
118
+ display: inline-flex; align-items: center; gap: 5px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  background: rgba(16,185,129,0.1);
120
+ border: 1px solid rgba(16,185,129,0.22);
121
+ color: #6EE7B7; padding: 0.25rem 0.7rem;
122
+ border-radius: 20px; font-size: 0.72rem; font-weight: 500; margin: 2px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  }
124
+
125
+ /* ── Stats ── */
126
+ .stat-grid {
127
+ display: grid; grid-template-columns: repeat(4,1fr); gap: 1rem;
128
+ margin: 1.8rem 0;
129
  }
130
+ .stat {
131
+ background: rgba(255,255,255,0.025);
132
+ border: 1px solid rgba(255,255,255,0.06);
133
+ border-radius: 14px; padding: 1.2rem 1.4rem;
134
+ position: relative; overflow: hidden;
 
135
  }
136
+ .stat::after {
137
+ content:''; position:absolute; top:0; left:0; right:0; height:2px;
138
+ background: linear-gradient(90deg,#6366F1,#10B981);
 
 
139
  }
140
+ .stat-n {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  font-family: 'Playfair Display', serif;
142
+ font-size: 2rem; font-weight: 700; color: #fff; line-height: 1;
143
+ }
144
+ .stat-l {
145
+ font-size: 0.68rem; font-weight: 500; letter-spacing: 0.1em;
146
+ text-transform: uppercase; color: #4B5563; margin-top: 4px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  }
 
148
 
149
+ /* ── Result header ── */
150
+ .result-row {
151
+ display: flex; align-items: center;
152
+ justify-content: space-between; margin-bottom: 1.2rem;
 
 
 
 
 
 
 
 
 
153
  }
154
+ .result-title {
155
+ font-family: 'Playfair Display', serif;
156
+ font-size: 1.4rem; font-weight: 600; color: #fff;
157
+ }
158
+ .result-badge {
159
+ font-size: 0.68rem; font-weight: 600; letter-spacing: 0.1em;
160
+ text-transform: uppercase; color: #10B981;
161
+ background: rgba(16,185,129,0.1);
162
+ border: 1px solid rgba(16,185,129,0.2);
163
+ padding: 0.28rem 0.8rem; border-radius: 20px;
164
  }
165
 
166
+ /* ── Divider ── */
167
+ .divider {
168
+ height: 1px; margin: 2rem 0;
169
+ background: linear-gradient(90deg,transparent,rgba(255,255,255,0.07),transparent);
170
  }
171
+
172
+ /* ── Empty state boxes ── */
173
+ .empty-card {
174
+ background: rgba(255,255,255,0.02);
175
+ border: 1px solid rgba(255,255,255,0.055);
176
+ border-radius: 14px; padding: 2rem 1.5rem; text-align: center;
177
+ }
178
+ .empty-icon { font-size: 1.8rem; margin-bottom: 0.7rem; }
179
+ .empty-title { font-weight: 600; color: #E2E8F0; margin-bottom: 0.4rem; font-size: 0.95rem; }
180
+ .empty-desc { font-size: 0.78rem; color: #4B5563; line-height: 1.6; }
181
+
182
+ /* ── Streamlit widget overrides ── */
183
+ /* File uploader */
184
  [data-testid="stFileUploader"] section {
185
  background: rgba(99,102,241,0.04) !important;
186
+ border: 2px dashed rgba(99,102,241,0.3) !important;
187
+ border-radius: 14px !important;
188
+ transition: all 0.2s !important;
 
189
  }
190
  [data-testid="stFileUploader"] section:hover {
191
+ border-color: rgba(99,102,241,0.6) !important;
192
  background: rgba(99,102,241,0.08) !important;
193
  }
194
+ [data-testid="stFileUploader"] label { color: #6B7280 !important; }
195
+
196
+ /* Primary button */
197
+ .stButton > button[kind="primary"] {
198
+ background: linear-gradient(135deg, #6366F1, #4F46E5) !important;
199
+ color: white !important; border: none !important;
200
+ border-radius: 12px !important; padding: 0.75rem 2rem !important;
201
+ font-family: 'Outfit', sans-serif !important;
202
+ font-size: 0.9rem !important; font-weight: 600 !important;
203
+ box-shadow: 0 4px 20px rgba(99,102,241,0.4) !important;
204
+ transition: all 0.2s !important; width: 100% !important;
205
+ }
206
+ .stButton > button[kind="primary"]:hover {
207
+ transform: translateY(-2px) !important;
208
+ box-shadow: 0 8px 28px rgba(99,102,241,0.55) !important;
209
+ }
210
+ .stButton > button:disabled {
211
+ background: rgba(255,255,255,0.05) !important;
212
+ color: #374151 !important; box-shadow: none !important;
213
+ transform: none !important; border: 1px solid rgba(255,255,255,0.06) !important;
214
+ }
215
+
216
+ /* Download button */
217
+ div[data-testid="stDownloadButton"] > button {
218
+ background: rgba(16,185,129,0.1) !important;
219
+ border: 1px solid rgba(16,185,129,0.28) !important;
220
+ color: #6EE7B7 !important; border-radius: 10px !important;
221
+ padding: 0.6rem 1.4rem !important;
222
+ font-family: 'Outfit', sans-serif !important;
223
+ font-weight: 600 !important; font-size: 0.85rem !important;
224
+ transition: all 0.2s !important;
225
+ }
226
+ div[data-testid="stDownloadButton"] > button:hover {
227
+ background: rgba(16,185,129,0.18) !important;
228
+ transform: translateY(-1px) !important;
229
+ }
230
 
231
+ /* Select box */
232
  [data-testid="stSelectbox"] > div > div {
233
  background: rgba(255,255,255,0.04) !important;
234
+ border: 1px solid rgba(255,255,255,0.09) !important;
235
+ border-radius: 10px !important; color: #E2E8F0 !important;
 
236
  }
237
 
238
+ /* Slider */
239
+ [data-testid="stSlider"] > div > div > div > div {
240
+ background: #6366F1 !important;
 
 
241
  }
242
 
243
+ /* Progress bar */
244
+ .stProgress > div > div > div {
245
+ background: linear-gradient(90deg,#6366F1,#10B981) !important;
246
+ border-radius: 4px !important;
247
  }
248
+ .stProgress > div > div {
249
+ background: rgba(255,255,255,0.06) !important;
250
+ border-radius: 4px !important; height: 5px !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
251
  }
252
 
253
+ /* Dataframe */
254
+ [data-testid="stDataFrame"] {
255
+ border-radius: 14px !important;
256
+ border: 1px solid rgba(255,255,255,0.07) !important;
257
+ overflow: hidden !important;
258
+ }
259
+
260
+ /* Number input */
261
+ [data-testid="stNumberInput"] input {
262
+ background: rgba(255,255,255,0.04) !important;
263
+ border: 1px solid rgba(255,255,255,0.09) !important;
264
+ border-radius: 10px !important; color: #E2E8F0 !important;
265
+ }
266
+
267
+ /* Expander */
268
+ [data-testid="stExpander"] {
269
+ background: rgba(255,255,255,0.02) !important;
270
+ border: 1px solid rgba(255,255,255,0.06) !important;
271
+ border-radius: 12px !important;
272
+ }
273
+
274
+ /* Alert/success */
275
+ [data-testid="stAlert"] {
276
+ border-radius: 12px !important;
277
+ border: none !important;
278
+ }
279
  </style>
280
  """, unsafe_allow_html=True)
281
 
282
 
283
+ # ═══════════════════════════════════════════════════════════
284
+ # MODEL LOADING
285
+ # ═══════════════════════════════════════════════════════════
286
+ @st.cache_resource(show_spinner="Loading model from HuggingFace Hub…")
287
  def load_model():
288
  import torch
289
  from transformers import TrOCRProcessor, VisionEncoderDecoderModel
 
300
  return easyocr.Reader(["en"], gpu=False)
301
 
302
 
303
+ # ═══════════════════════════════════════════════════════════
304
+ # HELPERS
305
+ # ═══════════════════════════════════════════════════════════
306
  def run_finetuned(img):
307
  import torch
308
  processor, model, device = load_model()
 
319
  def ocr_image(img, engine):
320
  try:
321
  return run_finetuned(img) if engine == "finetuned" else run_easyocr(img)
322
+ except:
323
  try: return run_easyocr(img)
324
  except: return ""
325
 
 
 
326
  def pdf_to_images(file_bytes, dpi=200, max_pages=None):
327
  import fitz
328
  doc = fitz.open(stream=file_bytes, filetype="pdf")
 
335
  doc.close()
336
  return pages
337
 
 
 
338
  def extract_fields(text, filename):
339
  import re
340
  def find(label):
 
343
  amounts = re.findall(r'AMT:\s*([\d,]+)', text)
344
  dates = re.search(r'DATE:\s*(\d{4}-\d{2}-\d{2})', text)
345
  return {
346
+ "File": filename,
347
+ "Plot Number": find("PLOT"),
348
+ "Location": find("LOC"),
349
+ "Area": find("AREA"),
350
+ "Amount (KES)": int(amounts[0].replace(",","")) if amounts else None,
351
+ "Date": dates.group(1) if dates else None,
352
+ "VOS": find("VOS"),
353
  }
354
 
 
 
355
  def to_excel(records):
356
  from openpyxl import load_workbook
357
+ from openpyxl.styles import Font, PatternFill, Alignment
358
  from openpyxl.utils import get_column_letter
359
  buf = io.BytesIO()
360
+ pd.DataFrame(records).to_excel(buf, index=False, sheet_name="Valuation Data")
 
361
  buf.seek(0)
362
+ wb = load_workbook(buf)
363
+ ws = wb.active
364
+ for ci, cell in enumerate(ws[1], 1):
 
365
  cell.font = Font(name="Calibri", bold=True, color="FFFFFF", size=11)
366
  cell.fill = PatternFill("solid", start_color="1E1B4B")
367
  cell.alignment = Alignment(horizontal="center", vertical="center")
368
+ ws.column_dimensions[get_column_letter(ci)].width = 22
369
+ ws.row_dimensions[1].height = 30
370
+ for row in ws.iter_rows(min_row=2):
371
+ for cell in row:
 
372
  cell.alignment = Alignment(vertical="center", wrap_text=True)
 
 
 
 
 
 
373
  if cell.row % 2 == 0:
374
+ cell.fill = PatternFill("solid", start_color="F5F4FF")
375
  ws.freeze_panes = "A2"
376
+ out = io.BytesIO(); wb.save(out)
 
377
  return out.getvalue()
378
 
379
 
380
+ # ═══════════════════════════════════════════════════════════
381
  # SESSION STATE
382
+ # ═══════════════════════════════════════════════════════════
383
+ for k, v in [("records",[]),("errors",[]),("excel_bytes",None),("done",False)]:
384
  if k not in st.session_state:
385
  st.session_state[k] = v
386
 
387
 
388
+ # ═══════════════════════════════════════════════════════════
389
+ # BRAND BAR
390
+ # ═══════════════════════════════════════════════════════════
 
 
 
391
  st.markdown("""
392
+ <div class="brand">
393
+ <div class="brand-left">
394
+ <div class="brand-dot"></div>
395
  <div>
396
+ <div class="brand-name">ValuationAI</div>
397
+ <div class="brand-tag">Handwriting Recognition System</div>
398
  </div>
399
  </div>
400
+ <div class="model-pill">rasmodev/Handwriting_trocr_model</div>
401
  </div>
402
  """, unsafe_allow_html=True)
403
 
404
+
405
+ # ═══════════════════════════════════════════════════════════
406
+ # HERO
407
+ # ═══════════════════════════════════════════════════════════
408
  st.markdown("""
409
+ <div class="hero-eyebrow">AI-Powered Document Intelligence</div>
410
+ <div class="hero-h1">Extract data from<br><span>handwritten valuations</span></div>
411
+ <div class="hero-sub">Upload scanned valuation sheet PDFs and get a clean,
412
+ structured Excel export in seconds β€” powered by a fine-tuned TrOCR model.</div>
 
 
 
 
 
 
413
  """, unsafe_allow_html=True)
414
 
 
 
 
 
415
 
416
+ # ═══════════════════════════════════════════════════════════
417
+ # UPLOAD + SETTINGS (side by side)
418
+ # ═══════════════════════════════════════════════════════════
419
+ col_up, col_cfg = st.columns([3, 2], gap="large")
420
+
421
+ with col_up:
422
+ st.markdown('<div class="card"><div class="card-label">Upload Documents</div>', unsafe_allow_html=True)
423
  uploaded_files = st.file_uploader(
424
+ "Drag & drop PDF files here",
425
+ type=["pdf","png","jpg","jpeg","tiff","tif","bmp"],
426
  accept_multiple_files=True,
427
  label_visibility="collapsed",
428
  )
429
  if uploaded_files:
430
  chips = "".join(
431
+ f'<span class="chip">πŸ“„ {f.name[:30]}{"…" if len(f.name)>30 else ""}</span>'
432
  for f in uploaded_files
433
  )
434
+ st.markdown(f'<div style="margin-top:0.8rem">{chips}</div>', unsafe_allow_html=True)
435
  st.markdown('</div>', unsafe_allow_html=True)
436
 
437
+ with col_cfg:
438
+ st.markdown('<div class="card"><div class="card-label">Settings</div>', unsafe_allow_html=True)
 
439
 
440
+ engine_sel = st.selectbox(
441
  "OCR Engine",
442
  ["🟒 Fine-tuned Model (Recommended)", "πŸ”΅ EasyOCR (Fallback)"],
443
  label_visibility="visible",
444
  )
445
+ engine = "finetuned" if "Fine-tuned" in engine_sel else "easyocr"
446
 
447
+ dpi = st.select_slider("Resolution (DPI)", options=[150, 200, 250, 300], value=200)
 
 
 
 
448
 
449
+ max_p = st.number_input("Max pages per PDF (0 = all)", 0, 100, 0)
450
  max_pages = None if max_p == 0 else int(max_p)
451
 
452
+ st.markdown("</div>", unsafe_allow_html=True)
453
+
454
+ # Run button below settings
455
+ _, btn_col, _ = st.columns([3, 2, 0.01])
456
+ with btn_col:
457
+ run = st.button(
458
+ "⚑ Run OCR & Extract Fields",
459
+ type="primary",
460
  disabled=not uploaded_files,
461
  use_container_width=True,
462
  )
 
463
 
464
 
465
+ # ═══════════════════════════════════════════════════════════
466
  # PROCESSING
467
+ # ═══════════════════════════════════════════════════════════
468
+ if run and uploaded_files:
469
+ st.session_state.records = []
470
+ st.session_state.errors = []
471
+ st.session_state.done = False
 
 
 
472
 
473
+ st.markdown('<div class="divider"></div>', unsafe_allow_html=True)
474
  bar = st.progress(0.0)
475
  status = st.empty()
476
  t0 = time.time()
 
477
 
478
  for fi, uf in enumerate(uploaded_files):
479
  fname = uf.name
480
  file_bytes = uf.read()
481
+ bar.progress(fi / len(uploaded_files), text=f"Reading {fname}…")
482
 
483
  try:
484
  ext = fname.lower().rsplit(".", 1)[-1]
485
+ pages = pdf_to_images(file_bytes, dpi=dpi, max_pages=max_pages) \
486
+ if ext == "pdf" \
487
+ else [(1, Image.open(io.BytesIO(file_bytes)).convert("RGB"))]
 
 
488
 
489
  for page_no, img in pages:
490
+ status.caption(f"Processing **{fname}** β€” page {page_no} of {len(pages)}")
 
 
 
 
 
491
  text = ocr_image(img, engine)
492
  record = extract_fields(text, fname)
493
  st.session_state.records.append(record)
 
495
  except Exception as e:
496
  st.session_state.errors.append(f"{fname}: {e}")
497
 
498
+ bar.progress((fi+1)/len(uploaded_files))
499
 
500
+ bar.empty(); status.empty()
501
+ st.session_state.excel_bytes = to_excel(st.session_state.records) if st.session_state.records else None
502
+ st.session_state.done = True
503
 
504
+ elapsed = time.time() - t0
505
+ st.success(f"βœ… Done β€” {len(st.session_state.records)} page(s) from {len(uploaded_files)} file(s) in {elapsed:.1f}s")
 
 
 
506
 
507
 
508
+ # ═══════════════════════════════════════════════════════════
509
  # RESULTS
510
+ # ═══════════════════════════════════════════════════════════
511
+ if st.session_state.done and st.session_state.records:
512
  records = st.session_state.records
513
  df = pd.DataFrame(records)
514
 
515
+ st.markdown('<div class="divider"></div>', unsafe_allow_html=True)
516
 
517
+ # Stats
518
+ n_plots = df["Plot Number"].astype(bool).sum() if "Plot Number" in df else 0
519
+ n_amounts = df["Amount (KES)"].notna().sum() if "Amount (KES)" in df else 0
520
+ n_dates = df["Date"].astype(bool).sum() if "Date" in df else 0
521
 
522
  st.markdown(f"""
523
+ <div class="stat-grid">
524
+ <div class="stat">
525
+ <div class="stat-n">{len(records)}</div>
526
+ <div class="stat-l">Pages Processed</div>
527
  </div>
528
+ <div class="stat">
529
+ <div class="stat-n">{n_plots}</div>
530
+ <div class="stat-l">Plot Numbers</div>
531
  </div>
532
+ <div class="stat">
533
+ <div class="stat-n">{n_amounts}</div>
534
+ <div class="stat-l">Amounts Found</div>
535
  </div>
536
+ <div class="stat">
537
+ <div class="stat-n">{n_dates}</div>
538
+ <div class="stat-l">Dates Captured</div>
539
  </div>
540
  </div>
541
  """, unsafe_allow_html=True)
542
 
543
+ # Title + download
544
+ hdr_l, hdr_r = st.columns([4, 1])
545
+ with hdr_l:
546
+ st.markdown('<div class="result-row"><div class="result-title">Extracted Data</div><div class="result-badge">βœ“ Ready to export</div></div>', unsafe_allow_html=True)
547
+ with hdr_r:
 
 
 
 
 
548
  if st.session_state.excel_bytes:
549
  st.download_button(
550
+ "⬇ Download Excel",
551
  data=st.session_state.excel_bytes,
552
  file_name="valuation_extracted.xlsx",
553
  mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
554
  )
555
 
556
+ # Dataframe
557
+ st.dataframe(df, use_container_width=True,
558
+ height=min(80 + len(df)*38, 520), hide_index=True)
 
 
 
 
559
 
560
+ # Errors
561
  if st.session_state.errors:
 
562
  with st.expander(f"οΏ½οΏ½ {len(st.session_state.errors)} file(s) had errors"):
563
  for e in st.session_state.errors:
564
+ st.markdown(f'<span style="color:#F87171;font-size:0.85rem">{e}</span>', unsafe_allow_html=True)
 
 
 
565
 
566
 
567
+ # ═══════════════════════════════════════════════════════════
568
  # EMPTY STATE
569
+ # ═══════════════════════════════════════════════════════════
570
+ if not st.session_state.done and not uploaded_files:
571
+ st.markdown('<div class="divider"></div>', unsafe_allow_html=True)
572
+ c1, c2, c3 = st.columns(3, gap="medium")
573
+ with c1:
574
+ st.markdown("""<div class="empty-card">
575
+ <div class="empty-icon">πŸ“„</div>
576
+ <div class="empty-title">Upload PDFs</div>
577
+ <div class="empty-desc">Drag in one or multiple scanned valuation sheet PDFs</div>
578
+ </div>""", unsafe_allow_html=True)
579
+ with c2:
580
+ st.markdown("""<div class="empty-card">
581
+ <div class="empty-icon">πŸ€–</div>
582
+ <div class="empty-title">AI Reads Fields</div>
583
+ <div class="empty-desc">Fine-tuned TrOCR extracts plot numbers, amounts, dates and locations</div>
584
+ </div>""", unsafe_allow_html=True)
585
+ with c3:
586
+ st.markdown("""<div class="empty-card">
587
+ <div class="empty-icon">πŸ“Š</div>
588
+ <div class="empty-title">Download Excel</div>
589
+ <div class="empty-desc">Get a clean formatted spreadsheet ready for data entry</div>
590
+ </div>""", unsafe_allow_html=True)