Spaces:
Sleeping
Sleeping
File size: 4,883 Bytes
3215154 e5bf98c c37482a 3215154 59a5c1c 2f1bd27 69dfe0d c37482a 2f1bd27 59a5c1c 69dfe0d 62750f4 59a5c1c 69dfe0d c37482a 69dfe0d 2f1bd27 e5bf98c 69dfe0d d441980 bbbba05 d441980 9dbbfc3 581bf1c 8a2ff37 d441980 ea3a700 d441980 2f1bd27 c37482a d441980 bbbba05 c37482a 69dfe0d c37482a 69dfe0d d441980 69dfe0d c37482a bbbba05 2f1bd27 69dfe0d d441980 69dfe0d c37482a bbbba05 2f1bd27 69dfe0d 2f1bd27 69dfe0d e5bf98c 2f1bd27 c37482a 2f1bd27 59a5c1c c37482a 69dfe0d c37482a 69dfe0d d441980 c37482a 69dfe0d c37482a bbbba05 69dfe0d c37482a d441980 c37482a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
import streamlit as st
import degirum as dg
import degirum_tools
from PIL import Image
# -----------------------
# Page config
# -----------------------
st.set_page_config(
page_title="Paddle OCR with DeGirum",
page_icon="📝",
)
# -----------------------
# Sidebar content
# -----------------------
st.sidebar.markdown(
"""
**About this demo**
**Inference location:** `DeGirum Cloud`
**Models used:**
- **Text detection:** `paddle_ocr_detection--544x960_quant_hailort_hailo8_1`
- **Text recognition:** `paddle_ocr_recognition--48x320_quant_hailort_hailo8_1`
**Libraries:**
- `degirum`
- `degirum_tools`
- `streamlit`
"""
)
# -----------------------
# Main page content
# -----------------------
st.title("Paddle OCR Inference with DeGirum PySDK")
st.write(
"This app demonstrates an end-to-end text detection and recognition pipeline "
"using PaddleOCR. It takes an image as the input, detects text regions, and "
"decodes the recognized text using OCR models. The combined results are then displayed."
)
st.text("Upload an image. Then click on the Run OCR button.")
# -----------------------
# Hardcoded configuration (your notebook params)
# -----------------------
INFERENCE_HOST_ADDRESS = "@cloud" # <-- changed from @local to @cloud
ZOO_URL = "degirum/franklin_sandbox"
DEVICE_TYPE = ["OPENVINO/CPU"]
PADDLE_OCR_DET_MODEL_NAME = "paddle_ocr_detection--544x960_float_openvino_cpu_1"
PADDLE_OCR_REC_MODEL_NAME = "paddle_ocr_recognition--48x320_float_openvino_cpu_1"
@st.cache_resource
def load_crop_model():
"""
Load Paddle OCR detection + recognition models and wrap them in a
CroppingAndClassifyingCompoundModel so detection crops feed into OCR.
IMPORTANT: We set image_backend="pil" because the app passes a PIL.Image.
"""
token = st.secrets.get("DG_TOKEN", "")
# Load paddle ocr text detection model
paddle_ocr_text_det_model = dg.load_model(
model_name=PADDLE_OCR_DET_MODEL_NAME,
inference_host_address=INFERENCE_HOST_ADDRESS,
zoo_url=ZOO_URL,
device_type=DEVICE_TYPE,
token=token,
image_backend="pil", # <-- key fix
)
# Load paddle ocr text recognition model
paddle_ocr_text_rec_model = dg.load_model(
model_name=PADDLE_OCR_REC_MODEL_NAME,
inference_host_address=INFERENCE_HOST_ADDRESS,
zoo_url=ZOO_URL,
device_type=DEVICE_TYPE,
token=token,
image_backend="pil", # <-- key fix
)
# Create a compound cropping model (det → crops → rec)
crop_model = degirum_tools.CroppingAndClassifyingCompoundModel(
paddle_ocr_text_det_model,
paddle_ocr_text_rec_model,
)
return crop_model
crop_model = load_crop_model()
# -----------------------
# Form: upload + run OCR
# -----------------------
with st.form("ocr_form"):
uploaded_file = st.file_uploader(
"Input image",
type=["jpg", "jpeg", "png", "bmp", "tiff"],
)
submitted = st.form_submit_button("Run OCR")
if submitted:
if uploaded_file is None:
st.warning("Please upload an image first.")
else:
# Use the uploaded image instead of a fixed path
image = Image.open(uploaded_file).convert("RGB")
# Optional: shrink huge images a bit
image.thumbnail((960, 960), Image.Resampling.LANCZOS)
# Run AI inference on image
inference_result = crop_model(image)
# Display image with boxes (if overlay is available)
if hasattr(inference_result, "image_overlay"):
st.image(
inference_result.image_overlay,
caption="OCR Output (detected text regions)",
)
else:
st.image(
image,
caption="Input image (no overlay available)",
)
# Try to show structured output if to_pandas exists
st.subheader("OCR Results")
try:
df = inference_result.to_pandas()
st.dataframe(df)
# Try to extract recognized text columns
text_cols = [
col
for col in df.columns
if "text" in col.lower() or "label" in col.lower()
]
if text_cols:
st.subheader("Recognized Text")
all_texts = []
for col in text_cols:
all_texts.extend(
[str(x) for x in df[col].dropna().tolist()]
)
if all_texts:
st.write("\n".join(f"- {t}" for t in all_texts))
except Exception:
st.write("Raw result object:")
st.write(inference_result) |