import streamlit as st import degirum as dg import degirum_tools from PIL import Image # ----------------------- # Page config # ----------------------- st.set_page_config( page_title="Paddle OCR with DeGirum", page_icon="📝", ) # ----------------------- # Sidebar content # ----------------------- st.sidebar.markdown( """ **About this demo** **Inference location:** `DeGirum Cloud` **Models used:** - **Text detection:** `paddle_ocr_detection--544x960_quant_hailort_hailo8_1` - **Text recognition:** `paddle_ocr_recognition--48x320_quant_hailort_hailo8_1` **Libraries:** - `degirum` - `degirum_tools` - `streamlit` """ ) # ----------------------- # Main page content # ----------------------- st.title("Paddle OCR Inference with DeGirum PySDK") st.write( "This app demonstrates an end-to-end text detection and recognition pipeline " "using PaddleOCR. It takes an image as the input, detects text regions, and " "decodes the recognized text using OCR models. The combined results are then displayed." ) st.text("Upload an image. Then click on the Run OCR button.") # ----------------------- # Hardcoded configuration (your notebook params) # ----------------------- INFERENCE_HOST_ADDRESS = "@cloud" # <-- changed from @local to @cloud ZOO_URL = "degirum/franklin_sandbox" DEVICE_TYPE = ["OPENVINO/CPU"] PADDLE_OCR_DET_MODEL_NAME = "paddle_ocr_detection--544x960_float_openvino_cpu_1" PADDLE_OCR_REC_MODEL_NAME = "paddle_ocr_recognition--48x320_float_openvino_cpu_1" @st.cache_resource def load_crop_model(): """ Load Paddle OCR detection + recognition models and wrap them in a CroppingAndClassifyingCompoundModel so detection crops feed into OCR. IMPORTANT: We set image_backend="pil" because the app passes a PIL.Image. """ token = st.secrets.get("DG_TOKEN", "") # Load paddle ocr text detection model paddle_ocr_text_det_model = dg.load_model( model_name=PADDLE_OCR_DET_MODEL_NAME, inference_host_address=INFERENCE_HOST_ADDRESS, zoo_url=ZOO_URL, device_type=DEVICE_TYPE, token=token, image_backend="pil", # <-- key fix ) # Load paddle ocr text recognition model paddle_ocr_text_rec_model = dg.load_model( model_name=PADDLE_OCR_REC_MODEL_NAME, inference_host_address=INFERENCE_HOST_ADDRESS, zoo_url=ZOO_URL, device_type=DEVICE_TYPE, token=token, image_backend="pil", # <-- key fix ) # Create a compound cropping model (det → crops → rec) crop_model = degirum_tools.CroppingAndClassifyingCompoundModel( paddle_ocr_text_det_model, paddle_ocr_text_rec_model, ) return crop_model crop_model = load_crop_model() # ----------------------- # Form: upload + run OCR # ----------------------- with st.form("ocr_form"): uploaded_file = st.file_uploader( "Input image", type=["jpg", "jpeg", "png", "bmp", "tiff"], ) submitted = st.form_submit_button("Run OCR") if submitted: if uploaded_file is None: st.warning("Please upload an image first.") else: # Use the uploaded image instead of a fixed path image = Image.open(uploaded_file).convert("RGB") # Optional: shrink huge images a bit image.thumbnail((960, 960), Image.Resampling.LANCZOS) # Run AI inference on image inference_result = crop_model(image) # Display image with boxes (if overlay is available) if hasattr(inference_result, "image_overlay"): st.image( inference_result.image_overlay, caption="OCR Output (detected text regions)", ) else: st.image( image, caption="Input image (no overlay available)", ) # Try to show structured output if to_pandas exists st.subheader("OCR Results") try: df = inference_result.to_pandas() st.dataframe(df) # Try to extract recognized text columns text_cols = [ col for col in df.columns if "text" in col.lower() or "label" in col.lower() ] if text_cols: st.subheader("Recognized Text") all_texts = [] for col in text_cols: all_texts.extend( [str(x) for x in df[col].dropna().tolist()] ) if all_texts: st.write("\n".join(f"- {t}" for t in all_texts)) except Exception: st.write("Raw result object:") st.write(inference_result)