Spaces:

heerjtdev
/

rocr

Sleeping

File size: 5,085 Bytes

# import gradio as gr
# from rapidocr import RapidOCR, OCRVersion

# # 1. Initialize the OCR engine once with v5 defaults
# # We use v5 for Detection/Recognition and v4 for Classification (most stable v5 setup)
# engine = RapidOCR(params={
#     "Det.ocr_version": OCRVersion.PPOCRV5,
#     "Rec.ocr_version": OCRVersion.PPOCRV5,
#     "Cls.ocr_version": OCRVersion.PPOCRV4,
# })

# def perform_ocr(img):
#     if img is None:
#         return None, None, "0.0"

#     # 2. Run OCR. return_word_box=True provides the word/char level detail
#     ocr_result = engine(img, return_word_box=True)
    
#     # 3. Get the annotated preview image
#     vis_img = ocr_result.vis()
    
#     # 4. Format word-level results for the Dataframe
#     # We flatten the word_results list using the logic from your advanced script
#     word_list = []
#     if ocr_result.word_results:
#         flat_results = sum(ocr_result.word_results, ())
#         for i, (text, score, _) in enumerate(flat_results):
#             word_list.append([i + 1, text, round(float(score), 3)])
            
#     return vis_img, word_list, f"{ocr_result.elapse:.3f}s"

# # 5. Build a clean, minimal UI
# with gr.Blocks(title="Rapid⚡OCR Simple") as demo:
#     gr.Markdown("# Rapid⚡OCR v5")
#     gr.Markdown("Upload an image to extract text with word-level bounding boxes.")
    
#     with gr.Row():
#         with gr.Column():
#             input_img = gr.Image(label="Input Image", type="numpy")
#             run_btn = gr.Button("Run OCR", variant="primary")
        
#         with gr.Column():
#             output_img = gr.Image(label="Preview (Bounding Boxes)")
#             elapse_info = gr.Textbox(label="Processing Time")
            
#     result_table = gr.Dataframe(
#         headers=["ID", "Text", "Confidence"],
#         label="Detected Words",
#         interactive=False
#     )

#     run_btn.click(
#         fn=perform_ocr,
#         inputs=[input_img],
#         outputs=[output_img, result_table, elapse_info]
#     )

# if __name__ == "__main__":
#     demo.launch()














import gradio as gr
from rapidocr import RapidOCR, OCRVersion
import json
import tempfile
import os

# Initialize the engine with v5 defaults
engine = RapidOCR(params={
    "Det.ocr_version": OCRVersion.PPOCRV5,
    "Rec.ocr_version": OCRVersion.PPOCRV5,
    "Cls.ocr_version": OCRVersion.PPOCRV4,
})

def perform_ocr(img):
    if img is None:
        return None, None, "0.0", None

    # Run OCR with word-level detection enabled
    ocr_result = engine(img, return_word_box=True)
    
    # Generate annotated image
    vis_img = ocr_result.vis()
    
    # Process results into the Table and JSON format
    word_list_for_table = []
    json_data_list = []
    
    if ocr_result.word_results:
        # Flatten the per-line word results into a single list
        flat_results = sum(ocr_result.word_results, ())
        
        for i, (text, score, bbox) in enumerate(flat_results):
            # 1. Prepare Table Data
            word_list_for_table.append([i + 1, text, round(float(score), 3)])
            
            # 2. Prepare JSON Data (Convert 4-point box to [xmin, ymin, xmax, ymax])
            # bbox is typically [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
            xs = [p[0] for p in bbox]
            ys = [p[1] for p in bbox]
            xmin, ymin, xmax, ymax = min(xs), min(ys), max(xs), max(ys)
            
            json_data_list.append({
                "word": text,
                "bbox": [int(xmin), int(ymin), int(xmax), int(ymax)],
                "type": "text"
            })

    # Wrap in the requested page-based JSON structure
    final_json = [{
        "page_number": 1,
        "data": json_data_list,
        "column_separator_x": None
    }]

    # Save to a temporary file for download
    temp_dir = tempfile.gettempdir()
    json_path = os.path.join(temp_dir, "ocr_results.json")
    with open(json_path, "w", encoding="utf-8") as f:
        json.dump(final_json, f, indent=4, ensure_ascii=False)
            
    return vis_img, word_list_for_table, f"{ocr_result.elapse:.3f}s", json_path

# Gradio Interface
with gr.Blocks(title="Rapid⚡OCR to JSON") as demo:
    gr.Markdown("# Rapid⚡OCR v5 with JSON Export")
    gr.Markdown("Extract word-level bounding boxes in the same format as your preprocessed data.")
    
    with gr.Row():
        with gr.Column():
            input_img = gr.Image(label="Input Image", type="numpy")
            run_btn = gr.Button("Run OCR", variant="primary")
        
        with gr.Column():
            output_img = gr.Image(label="Preview")
            elapse_info = gr.Textbox(label="Processing Time")
            json_download = gr.File(label="Download OCR JSON")
            
    result_table = gr.Dataframe(
        headers=["ID", "Text", "Confidence"],
        label="Detected Words",
        interactive=False
    )

    run_btn.click(
        fn=perform_ocr,
        inputs=[input_img],
        outputs=[output_img, result_table, elapse_info, json_download]
    )

if __name__ == "__main__":
    demo.launch()