File size: 5,085 Bytes
4095636 fc8c0fc 4095636 fc8c0fc 4095636 fc8c0fc 4095636 fc8c0fc 4095636 fc8c0fc 4095636 fc8c0fc 4095636 fc8c0fc 4095636 fc8c0fc 4095636 fc8c0fc 4095636 fc8c0fc 4095636 fc8c0fc 4095636 fc8c0fc 4095636 fc8c0fc 4095636 fc8c0fc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
# import gradio as gr
# from rapidocr import RapidOCR, OCRVersion
# # 1. Initialize the OCR engine once with v5 defaults
# # We use v5 for Detection/Recognition and v4 for Classification (most stable v5 setup)
# engine = RapidOCR(params={
# "Det.ocr_version": OCRVersion.PPOCRV5,
# "Rec.ocr_version": OCRVersion.PPOCRV5,
# "Cls.ocr_version": OCRVersion.PPOCRV4,
# })
# def perform_ocr(img):
# if img is None:
# return None, None, "0.0"
# # 2. Run OCR. return_word_box=True provides the word/char level detail
# ocr_result = engine(img, return_word_box=True)
# # 3. Get the annotated preview image
# vis_img = ocr_result.vis()
# # 4. Format word-level results for the Dataframe
# # We flatten the word_results list using the logic from your advanced script
# word_list = []
# if ocr_result.word_results:
# flat_results = sum(ocr_result.word_results, ())
# for i, (text, score, _) in enumerate(flat_results):
# word_list.append([i + 1, text, round(float(score), 3)])
# return vis_img, word_list, f"{ocr_result.elapse:.3f}s"
# # 5. Build a clean, minimal UI
# with gr.Blocks(title="Rapid⚡OCR Simple") as demo:
# gr.Markdown("# Rapid⚡OCR v5")
# gr.Markdown("Upload an image to extract text with word-level bounding boxes.")
# with gr.Row():
# with gr.Column():
# input_img = gr.Image(label="Input Image", type="numpy")
# run_btn = gr.Button("Run OCR", variant="primary")
# with gr.Column():
# output_img = gr.Image(label="Preview (Bounding Boxes)")
# elapse_info = gr.Textbox(label="Processing Time")
# result_table = gr.Dataframe(
# headers=["ID", "Text", "Confidence"],
# label="Detected Words",
# interactive=False
# )
# run_btn.click(
# fn=perform_ocr,
# inputs=[input_img],
# outputs=[output_img, result_table, elapse_info]
# )
# if __name__ == "__main__":
# demo.launch()
import gradio as gr
from rapidocr import RapidOCR, OCRVersion
import json
import tempfile
import os
# Initialize the engine with v5 defaults
engine = RapidOCR(params={
"Det.ocr_version": OCRVersion.PPOCRV5,
"Rec.ocr_version": OCRVersion.PPOCRV5,
"Cls.ocr_version": OCRVersion.PPOCRV4,
})
def perform_ocr(img):
if img is None:
return None, None, "0.0", None
# Run OCR with word-level detection enabled
ocr_result = engine(img, return_word_box=True)
# Generate annotated image
vis_img = ocr_result.vis()
# Process results into the Table and JSON format
word_list_for_table = []
json_data_list = []
if ocr_result.word_results:
# Flatten the per-line word results into a single list
flat_results = sum(ocr_result.word_results, ())
for i, (text, score, bbox) in enumerate(flat_results):
# 1. Prepare Table Data
word_list_for_table.append([i + 1, text, round(float(score), 3)])
# 2. Prepare JSON Data (Convert 4-point box to [xmin, ymin, xmax, ymax])
# bbox is typically [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
xs = [p[0] for p in bbox]
ys = [p[1] for p in bbox]
xmin, ymin, xmax, ymax = min(xs), min(ys), max(xs), max(ys)
json_data_list.append({
"word": text,
"bbox": [int(xmin), int(ymin), int(xmax), int(ymax)],
"type": "text"
})
# Wrap in the requested page-based JSON structure
final_json = [{
"page_number": 1,
"data": json_data_list,
"column_separator_x": None
}]
# Save to a temporary file for download
temp_dir = tempfile.gettempdir()
json_path = os.path.join(temp_dir, "ocr_results.json")
with open(json_path, "w", encoding="utf-8") as f:
json.dump(final_json, f, indent=4, ensure_ascii=False)
return vis_img, word_list_for_table, f"{ocr_result.elapse:.3f}s", json_path
# Gradio Interface
with gr.Blocks(title="Rapid⚡OCR to JSON") as demo:
gr.Markdown("# Rapid⚡OCR v5 with JSON Export")
gr.Markdown("Extract word-level bounding boxes in the same format as your preprocessed data.")
with gr.Row():
with gr.Column():
input_img = gr.Image(label="Input Image", type="numpy")
run_btn = gr.Button("Run OCR", variant="primary")
with gr.Column():
output_img = gr.Image(label="Preview")
elapse_info = gr.Textbox(label="Processing Time")
json_download = gr.File(label="Download OCR JSON")
result_table = gr.Dataframe(
headers=["ID", "Text", "Confidence"],
label="Detected Words",
interactive=False
)
run_btn.click(
fn=perform_ocr,
inputs=[input_img],
outputs=[output_img, result_table, elapse_info, json_download]
)
if __name__ == "__main__":
demo.launch() |