rocr / app.py
heerjtdev's picture
Update app.py
4095636 verified
# import gradio as gr
# from rapidocr import RapidOCR, OCRVersion
# # 1. Initialize the OCR engine once with v5 defaults
# # We use v5 for Detection/Recognition and v4 for Classification (most stable v5 setup)
# engine = RapidOCR(params={
# "Det.ocr_version": OCRVersion.PPOCRV5,
# "Rec.ocr_version": OCRVersion.PPOCRV5,
# "Cls.ocr_version": OCRVersion.PPOCRV4,
# })
# def perform_ocr(img):
# if img is None:
# return None, None, "0.0"
# # 2. Run OCR. return_word_box=True provides the word/char level detail
# ocr_result = engine(img, return_word_box=True)
# # 3. Get the annotated preview image
# vis_img = ocr_result.vis()
# # 4. Format word-level results for the Dataframe
# # We flatten the word_results list using the logic from your advanced script
# word_list = []
# if ocr_result.word_results:
# flat_results = sum(ocr_result.word_results, ())
# for i, (text, score, _) in enumerate(flat_results):
# word_list.append([i + 1, text, round(float(score), 3)])
# return vis_img, word_list, f"{ocr_result.elapse:.3f}s"
# # 5. Build a clean, minimal UI
# with gr.Blocks(title="Rapid⚡OCR Simple") as demo:
# gr.Markdown("# Rapid⚡OCR v5")
# gr.Markdown("Upload an image to extract text with word-level bounding boxes.")
# with gr.Row():
# with gr.Column():
# input_img = gr.Image(label="Input Image", type="numpy")
# run_btn = gr.Button("Run OCR", variant="primary")
# with gr.Column():
# output_img = gr.Image(label="Preview (Bounding Boxes)")
# elapse_info = gr.Textbox(label="Processing Time")
# result_table = gr.Dataframe(
# headers=["ID", "Text", "Confidence"],
# label="Detected Words",
# interactive=False
# )
# run_btn.click(
# fn=perform_ocr,
# inputs=[input_img],
# outputs=[output_img, result_table, elapse_info]
# )
# if __name__ == "__main__":
# demo.launch()
import gradio as gr
from rapidocr import RapidOCR, OCRVersion
import json
import tempfile
import os
# Initialize the engine with v5 defaults
engine = RapidOCR(params={
"Det.ocr_version": OCRVersion.PPOCRV5,
"Rec.ocr_version": OCRVersion.PPOCRV5,
"Cls.ocr_version": OCRVersion.PPOCRV4,
})
def perform_ocr(img):
if img is None:
return None, None, "0.0", None
# Run OCR with word-level detection enabled
ocr_result = engine(img, return_word_box=True)
# Generate annotated image
vis_img = ocr_result.vis()
# Process results into the Table and JSON format
word_list_for_table = []
json_data_list = []
if ocr_result.word_results:
# Flatten the per-line word results into a single list
flat_results = sum(ocr_result.word_results, ())
for i, (text, score, bbox) in enumerate(flat_results):
# 1. Prepare Table Data
word_list_for_table.append([i + 1, text, round(float(score), 3)])
# 2. Prepare JSON Data (Convert 4-point box to [xmin, ymin, xmax, ymax])
# bbox is typically [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
xs = [p[0] for p in bbox]
ys = [p[1] for p in bbox]
xmin, ymin, xmax, ymax = min(xs), min(ys), max(xs), max(ys)
json_data_list.append({
"word": text,
"bbox": [int(xmin), int(ymin), int(xmax), int(ymax)],
"type": "text"
})
# Wrap in the requested page-based JSON structure
final_json = [{
"page_number": 1,
"data": json_data_list,
"column_separator_x": None
}]
# Save to a temporary file for download
temp_dir = tempfile.gettempdir()
json_path = os.path.join(temp_dir, "ocr_results.json")
with open(json_path, "w", encoding="utf-8") as f:
json.dump(final_json, f, indent=4, ensure_ascii=False)
return vis_img, word_list_for_table, f"{ocr_result.elapse:.3f}s", json_path
# Gradio Interface
with gr.Blocks(title="Rapid⚡OCR to JSON") as demo:
gr.Markdown("# Rapid⚡OCR v5 with JSON Export")
gr.Markdown("Extract word-level bounding boxes in the same format as your preprocessed data.")
with gr.Row():
with gr.Column():
input_img = gr.Image(label="Input Image", type="numpy")
run_btn = gr.Button("Run OCR", variant="primary")
with gr.Column():
output_img = gr.Image(label="Preview")
elapse_info = gr.Textbox(label="Processing Time")
json_download = gr.File(label="Download OCR JSON")
result_table = gr.Dataframe(
headers=["ID", "Text", "Confidence"],
label="Detected Words",
interactive=False
)
run_btn.click(
fn=perform_ocr,
inputs=[input_img],
outputs=[output_img, result_table, elapse_info, json_download]
)
if __name__ == "__main__":
demo.launch()