File size: 5,085 Bytes
4095636
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc8c0fc
 
4095636
 
 
fc8c0fc
4095636
fc8c0fc
 
 
 
 
 
 
 
4095636
fc8c0fc
4095636
fc8c0fc
 
4095636
fc8c0fc
 
4095636
 
 
 
fc8c0fc
4095636
fc8c0fc
4095636
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc8c0fc
4095636
fc8c0fc
4095636
 
 
 
fc8c0fc
 
 
 
 
 
 
4095636
fc8c0fc
4095636
fc8c0fc
 
 
 
 
 
 
 
 
 
4095636
fc8c0fc
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# import gradio as gr
# from rapidocr import RapidOCR, OCRVersion

# # 1. Initialize the OCR engine once with v5 defaults
# # We use v5 for Detection/Recognition and v4 for Classification (most stable v5 setup)
# engine = RapidOCR(params={
#     "Det.ocr_version": OCRVersion.PPOCRV5,
#     "Rec.ocr_version": OCRVersion.PPOCRV5,
#     "Cls.ocr_version": OCRVersion.PPOCRV4,
# })

# def perform_ocr(img):
#     if img is None:
#         return None, None, "0.0"

#     # 2. Run OCR. return_word_box=True provides the word/char level detail
#     ocr_result = engine(img, return_word_box=True)
    
#     # 3. Get the annotated preview image
#     vis_img = ocr_result.vis()
    
#     # 4. Format word-level results for the Dataframe
#     # We flatten the word_results list using the logic from your advanced script
#     word_list = []
#     if ocr_result.word_results:
#         flat_results = sum(ocr_result.word_results, ())
#         for i, (text, score, _) in enumerate(flat_results):
#             word_list.append([i + 1, text, round(float(score), 3)])
            
#     return vis_img, word_list, f"{ocr_result.elapse:.3f}s"

# # 5. Build a clean, minimal UI
# with gr.Blocks(title="Rapid⚡OCR Simple") as demo:
#     gr.Markdown("# Rapid⚡OCR v5")
#     gr.Markdown("Upload an image to extract text with word-level bounding boxes.")
    
#     with gr.Row():
#         with gr.Column():
#             input_img = gr.Image(label="Input Image", type="numpy")
#             run_btn = gr.Button("Run OCR", variant="primary")
        
#         with gr.Column():
#             output_img = gr.Image(label="Preview (Bounding Boxes)")
#             elapse_info = gr.Textbox(label="Processing Time")
            
#     result_table = gr.Dataframe(
#         headers=["ID", "Text", "Confidence"],
#         label="Detected Words",
#         interactive=False
#     )

#     run_btn.click(
#         fn=perform_ocr,
#         inputs=[input_img],
#         outputs=[output_img, result_table, elapse_info]
#     )

# if __name__ == "__main__":
#     demo.launch()














import gradio as gr
from rapidocr import RapidOCR, OCRVersion
import json
import tempfile
import os

# Initialize the engine with v5 defaults
engine = RapidOCR(params={
    "Det.ocr_version": OCRVersion.PPOCRV5,
    "Rec.ocr_version": OCRVersion.PPOCRV5,
    "Cls.ocr_version": OCRVersion.PPOCRV4,
})

def perform_ocr(img):
    if img is None:
        return None, None, "0.0", None

    # Run OCR with word-level detection enabled
    ocr_result = engine(img, return_word_box=True)
    
    # Generate annotated image
    vis_img = ocr_result.vis()
    
    # Process results into the Table and JSON format
    word_list_for_table = []
    json_data_list = []
    
    if ocr_result.word_results:
        # Flatten the per-line word results into a single list
        flat_results = sum(ocr_result.word_results, ())
        
        for i, (text, score, bbox) in enumerate(flat_results):
            # 1. Prepare Table Data
            word_list_for_table.append([i + 1, text, round(float(score), 3)])
            
            # 2. Prepare JSON Data (Convert 4-point box to [xmin, ymin, xmax, ymax])
            # bbox is typically [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
            xs = [p[0] for p in bbox]
            ys = [p[1] for p in bbox]
            xmin, ymin, xmax, ymax = min(xs), min(ys), max(xs), max(ys)
            
            json_data_list.append({
                "word": text,
                "bbox": [int(xmin), int(ymin), int(xmax), int(ymax)],
                "type": "text"
            })

    # Wrap in the requested page-based JSON structure
    final_json = [{
        "page_number": 1,
        "data": json_data_list,
        "column_separator_x": None
    }]

    # Save to a temporary file for download
    temp_dir = tempfile.gettempdir()
    json_path = os.path.join(temp_dir, "ocr_results.json")
    with open(json_path, "w", encoding="utf-8") as f:
        json.dump(final_json, f, indent=4, ensure_ascii=False)
            
    return vis_img, word_list_for_table, f"{ocr_result.elapse:.3f}s", json_path

# Gradio Interface
with gr.Blocks(title="Rapid⚡OCR to JSON") as demo:
    gr.Markdown("# Rapid⚡OCR v5 with JSON Export")
    gr.Markdown("Extract word-level bounding boxes in the same format as your preprocessed data.")
    
    with gr.Row():
        with gr.Column():
            input_img = gr.Image(label="Input Image", type="numpy")
            run_btn = gr.Button("Run OCR", variant="primary")
        
        with gr.Column():
            output_img = gr.Image(label="Preview")
            elapse_info = gr.Textbox(label="Processing Time")
            json_download = gr.File(label="Download OCR JSON")
            
    result_table = gr.Dataframe(
        headers=["ID", "Text", "Confidence"],
        label="Detected Words",
        interactive=False
    )

    run_btn.click(
        fn=perform_ocr,
        inputs=[input_img],
        outputs=[output_img, result_table, elapse_info, json_download]
    )

if __name__ == "__main__":
    demo.launch()