paddle_ocr / app.py
FranklinMoses's picture
Update app.py
9dbbfc3 verified
import streamlit as st
import degirum as dg
import degirum_tools
from PIL import Image
# -----------------------
# Page config
# -----------------------
st.set_page_config(
page_title="Paddle OCR with DeGirum",
page_icon="📝",
)
# -----------------------
# Sidebar content
# -----------------------
st.sidebar.markdown(
"""
**About this demo**
**Inference location:** `DeGirum Cloud`
**Models used:**
- **Text detection:** `paddle_ocr_detection--544x960_quant_hailort_hailo8_1`
- **Text recognition:** `paddle_ocr_recognition--48x320_quant_hailort_hailo8_1`
**Libraries:**
- `degirum`
- `degirum_tools`
- `streamlit`
"""
)
# -----------------------
# Main page content
# -----------------------
st.title("Paddle OCR Inference with DeGirum PySDK")
st.write(
"This app demonstrates an end-to-end text detection and recognition pipeline "
"using PaddleOCR. It takes an image as the input, detects text regions, and "
"decodes the recognized text using OCR models. The combined results are then displayed."
)
st.text("Upload an image. Then click on the Run OCR button.")
# -----------------------
# Hardcoded configuration (your notebook params)
# -----------------------
INFERENCE_HOST_ADDRESS = "@cloud" # <-- changed from @local to @cloud
ZOO_URL = "degirum/franklin_sandbox"
DEVICE_TYPE = ["OPENVINO/CPU"]
PADDLE_OCR_DET_MODEL_NAME = "paddle_ocr_detection--544x960_float_openvino_cpu_1"
PADDLE_OCR_REC_MODEL_NAME = "paddle_ocr_recognition--48x320_float_openvino_cpu_1"
@st.cache_resource
def load_crop_model():
"""
Load Paddle OCR detection + recognition models and wrap them in a
CroppingAndClassifyingCompoundModel so detection crops feed into OCR.
IMPORTANT: We set image_backend="pil" because the app passes a PIL.Image.
"""
token = st.secrets.get("DG_TOKEN", "")
# Load paddle ocr text detection model
paddle_ocr_text_det_model = dg.load_model(
model_name=PADDLE_OCR_DET_MODEL_NAME,
inference_host_address=INFERENCE_HOST_ADDRESS,
zoo_url=ZOO_URL,
device_type=DEVICE_TYPE,
token=token,
image_backend="pil", # <-- key fix
)
# Load paddle ocr text recognition model
paddle_ocr_text_rec_model = dg.load_model(
model_name=PADDLE_OCR_REC_MODEL_NAME,
inference_host_address=INFERENCE_HOST_ADDRESS,
zoo_url=ZOO_URL,
device_type=DEVICE_TYPE,
token=token,
image_backend="pil", # <-- key fix
)
# Create a compound cropping model (det → crops → rec)
crop_model = degirum_tools.CroppingAndClassifyingCompoundModel(
paddle_ocr_text_det_model,
paddle_ocr_text_rec_model,
)
return crop_model
crop_model = load_crop_model()
# -----------------------
# Form: upload + run OCR
# -----------------------
with st.form("ocr_form"):
uploaded_file = st.file_uploader(
"Input image",
type=["jpg", "jpeg", "png", "bmp", "tiff"],
)
submitted = st.form_submit_button("Run OCR")
if submitted:
if uploaded_file is None:
st.warning("Please upload an image first.")
else:
# Use the uploaded image instead of a fixed path
image = Image.open(uploaded_file).convert("RGB")
# Optional: shrink huge images a bit
image.thumbnail((960, 960), Image.Resampling.LANCZOS)
# Run AI inference on image
inference_result = crop_model(image)
# Display image with boxes (if overlay is available)
if hasattr(inference_result, "image_overlay"):
st.image(
inference_result.image_overlay,
caption="OCR Output (detected text regions)",
)
else:
st.image(
image,
caption="Input image (no overlay available)",
)
# Try to show structured output if to_pandas exists
st.subheader("OCR Results")
try:
df = inference_result.to_pandas()
st.dataframe(df)
# Try to extract recognized text columns
text_cols = [
col
for col in df.columns
if "text" in col.lower() or "label" in col.lower()
]
if text_cols:
st.subheader("Recognized Text")
all_texts = []
for col in text_cols:
all_texts.extend(
[str(x) for x in df[col].dropna().tolist()]
)
if all_texts:
st.write("\n".join(f"- {t}" for t in all_texts))
except Exception:
st.write("Raw result object:")
st.write(inference_result)