""" ValuationAI β Nairobi Valuation Sheet OCR Model: rasmodev/Handwriting_trocr_model """ import io, time, logging, tempfile, os import streamlit as st import pandas as pd from PIL import Image st.set_page_config( page_title="ValuationAI", page_icon="π", layout="wide", initial_sidebar_state="collapsed", ) logging.basicConfig(level=logging.INFO) st.markdown(""" """, unsafe_allow_html=True) # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ # MODEL # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ @st.cache_resource(show_spinner="Loading recognition modelβ¦") def load_model(): import torch from transformers import TrOCRProcessor, VisionEncoderDecoderModel MODEL_ID = "rasmodev/Handwriting_trocr_model" BASE_ID = "microsoft/trocr-base-handwritten" # Load processor from base model β has all required config files # Load weights from fine-tuned model β contains trained parameters processor = TrOCRProcessor.from_pretrained(BASE_ID) model = VisionEncoderDecoderModel.from_pretrained(MODEL_ID) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device).eval() return processor, model, device # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ # OCR # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ def ocr_page(img: Image.Image) -> str: import torch processor, model, device = load_model() pixel_values = processor( images=img.convert("RGB"), return_tensors="pt" ).pixel_values.to(device) with torch.no_grad(): generated = model.generate( pixel_values=pixel_values, max_new_tokens=64, num_beams=1, ) return processor.batch_decode(generated, skip_special_tokens=True)[0].strip() # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ # PARSE LABEL # Format: PLOT: ... | LOC: ... | AREA: ... | AMT: ... | DATE: ... | VOS: ... # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ def parse_label(raw_text: str, filename: str) -> dict: record = { "File": filename, "Plot Number": "", "Location": "", "Area": "", "Amount (KES)": None, "Date": "", "VOS": "", "Raw Output": raw_text, } for part in raw_text.split("|"): part = part.strip() if ":" not in part: continue key, _, val = part.partition(":") key = key.strip().upper() val = val.strip() if key == "PLOT": record["Plot Number"] = val elif key == "LOC": record["Location"] = val elif key == "AREA": record["Area"] = val elif key == "AMT": try: record["Amount (KES)"] = int(val.replace(",", "").replace(" ", "")) except ValueError: record["Amount (KES)"] = val elif key == "DATE": record["Date"] = val elif key == "VOS": record["VOS"] = val return record # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ # EXCEL EXPORT # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ def make_excel(records: list) -> bytes: from openpyxl import load_workbook from openpyxl.styles import Font, PatternFill, Alignment from openpyxl.utils import get_column_letter clean = [{k: v for k, v in r.items() if k != "Raw Output"} for r in records] buf = io.BytesIO() pd.DataFrame(clean).to_excel(buf, index=False, sheet_name="Valuation Data") buf.seek(0) wb = load_workbook(buf) ws = wb.active hdr = PatternFill("solid", start_color="1A1A2E") for ci, cell in enumerate(ws[1], 1): cell.font = Font(name="Calibri", bold=True, color="FFFFFF", size=11) cell.fill = hdr cell.alignment = Alignment(horizontal="center", vertical="center") ws.column_dimensions[get_column_letter(ci)].width = 26 ws.row_dimensions[1].height = 30 for row in ws.iter_rows(min_row=2): for cell in row: cell.alignment = Alignment(vertical="center", wrap_text=True) if cell.row % 2 == 0: cell.fill = PatternFill("solid", start_color="F0F4FF") ws.freeze_panes = "A2" out = io.BytesIO() wb.save(out) return out.getvalue() # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ # SESSION STATE # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ for k, v in [("records",[]),("excel",None),("done",False),("errors",[])]: if k not in st.session_state: st.session_state[k] = v # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ # UI # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ st.markdown("""
""", unsafe_allow_html=True) st.markdown("""