File size: 4,883 Bytes
3215154
 
e5bf98c
c37482a
3215154
59a5c1c
 
 
2f1bd27
69dfe0d
c37482a
2f1bd27
 
59a5c1c
 
 
 
 
69dfe0d
 
 
 
 
 
 
 
 
 
 
 
62750f4
59a5c1c
 
 
 
 
69dfe0d
c37482a
69dfe0d
 
 
2f1bd27
e5bf98c
69dfe0d
 
d441980
bbbba05
d441980
9dbbfc3
581bf1c
8a2ff37
d441980
ea3a700
 
d441980
2f1bd27
c37482a
 
 
d441980
 
bbbba05
 
c37482a
69dfe0d
c37482a
69dfe0d
 
d441980
 
 
69dfe0d
c37482a
bbbba05
2f1bd27
 
69dfe0d
 
d441980
 
 
69dfe0d
c37482a
bbbba05
2f1bd27
 
69dfe0d
2f1bd27
69dfe0d
 
e5bf98c
 
2f1bd27
 
 
c37482a
2f1bd27
59a5c1c
 
 
c37482a
 
 
 
 
 
 
 
 
 
 
69dfe0d
c37482a
69dfe0d
d441980
c37482a
69dfe0d
c37482a
 
bbbba05
69dfe0d
 
 
 
 
 
 
 
 
 
 
 
c37482a
 
 
 
 
d441980
c37482a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import streamlit as st
import degirum as dg
import degirum_tools
from PIL import Image

# -----------------------
# Page config
# -----------------------
st.set_page_config(
    page_title="Paddle OCR with DeGirum",
    page_icon="📝",
)

# -----------------------
# Sidebar content
# -----------------------
st.sidebar.markdown(
    """
**About this demo**

**Inference location:** `DeGirum Cloud`

**Models used:**
- **Text detection:** `paddle_ocr_detection--544x960_quant_hailort_hailo8_1`
- **Text recognition:** `paddle_ocr_recognition--48x320_quant_hailort_hailo8_1`

**Libraries:**
- `degirum`
- `degirum_tools`
- `streamlit`
"""
)

# -----------------------
# Main page content
# -----------------------
st.title("Paddle OCR Inference with DeGirum PySDK")
st.write(
    "This app demonstrates an end-to-end text detection and recognition pipeline "
    "using PaddleOCR. It takes an image as the input, detects text regions, and "
    "decodes the recognized text using OCR models. The combined results are then displayed."
)

st.text("Upload an image. Then click on the Run OCR button.")

# -----------------------
# Hardcoded configuration (your notebook params)
# -----------------------
INFERENCE_HOST_ADDRESS = "@cloud"         # <-- changed from @local to @cloud
ZOO_URL = "degirum/franklin_sandbox"
DEVICE_TYPE = ["OPENVINO/CPU"]

PADDLE_OCR_DET_MODEL_NAME = "paddle_ocr_detection--544x960_float_openvino_cpu_1"
PADDLE_OCR_REC_MODEL_NAME = "paddle_ocr_recognition--48x320_float_openvino_cpu_1"


@st.cache_resource
def load_crop_model():
    """
    Load Paddle OCR detection + recognition models and wrap them in a
    CroppingAndClassifyingCompoundModel so detection crops feed into OCR.

    IMPORTANT: We set image_backend="pil" because the app passes a PIL.Image.
    """
    token = st.secrets.get("DG_TOKEN", "")

    # Load paddle ocr text detection model
    paddle_ocr_text_det_model = dg.load_model(
        model_name=PADDLE_OCR_DET_MODEL_NAME,
        inference_host_address=INFERENCE_HOST_ADDRESS,
        zoo_url=ZOO_URL,
        device_type=DEVICE_TYPE,
        token=token,
        image_backend="pil",  # <-- key fix
    )

    # Load paddle ocr text recognition model
    paddle_ocr_text_rec_model = dg.load_model(
        model_name=PADDLE_OCR_REC_MODEL_NAME,
        inference_host_address=INFERENCE_HOST_ADDRESS,
        zoo_url=ZOO_URL,
        device_type=DEVICE_TYPE,
        token=token,
        image_backend="pil",  # <-- key fix
    )

    # Create a compound cropping model (det → crops → rec)
    crop_model = degirum_tools.CroppingAndClassifyingCompoundModel(
        paddle_ocr_text_det_model,
        paddle_ocr_text_rec_model,
    )

    return crop_model


crop_model = load_crop_model()

# -----------------------
# Form: upload + run OCR
# -----------------------
with st.form("ocr_form"):
    uploaded_file = st.file_uploader(
        "Input image",
        type=["jpg", "jpeg", "png", "bmp", "tiff"],
    )
    submitted = st.form_submit_button("Run OCR")

    if submitted:
        if uploaded_file is None:
            st.warning("Please upload an image first.")
        else:
            # Use the uploaded image instead of a fixed path
            image = Image.open(uploaded_file).convert("RGB")
            # Optional: shrink huge images a bit
            image.thumbnail((960, 960), Image.Resampling.LANCZOS)

            # Run AI inference on image
            inference_result = crop_model(image)

            # Display image with boxes (if overlay is available)
            if hasattr(inference_result, "image_overlay"):
                st.image(
                    inference_result.image_overlay,
                    caption="OCR Output (detected text regions)",
                )
            else:
                st.image(
                    image,
                    caption="Input image (no overlay available)",
                )

            # Try to show structured output if to_pandas exists
            st.subheader("OCR Results")
            try:
                df = inference_result.to_pandas()
                st.dataframe(df)

                # Try to extract recognized text columns
                text_cols = [
                    col
                    for col in df.columns
                    if "text" in col.lower() or "label" in col.lower()
                ]
                if text_cols:
                    st.subheader("Recognized Text")
                    all_texts = []
                    for col in text_cols:
                        all_texts.extend(
                            [str(x) for x in df[col].dropna().tolist()]
                        )
                    if all_texts:
                        st.write("\n".join(f"- {t}" for t in all_texts))
            except Exception:
                st.write("Raw result object:")
                st.write(inference_result)