| # import gradio as gr | |
| # from rapidocr import RapidOCR, OCRVersion | |
| # # 1. Initialize the OCR engine once with v5 defaults | |
| # # We use v5 for Detection/Recognition and v4 for Classification (most stable v5 setup) | |
| # engine = RapidOCR(params={ | |
| # "Det.ocr_version": OCRVersion.PPOCRV5, | |
| # "Rec.ocr_version": OCRVersion.PPOCRV5, | |
| # "Cls.ocr_version": OCRVersion.PPOCRV4, | |
| # }) | |
| # def perform_ocr(img): | |
| # if img is None: | |
| # return None, None, "0.0" | |
| # # 2. Run OCR. return_word_box=True provides the word/char level detail | |
| # ocr_result = engine(img, return_word_box=True) | |
| # # 3. Get the annotated preview image | |
| # vis_img = ocr_result.vis() | |
| # # 4. Format word-level results for the Dataframe | |
| # # We flatten the word_results list using the logic from your advanced script | |
| # word_list = [] | |
| # if ocr_result.word_results: | |
| # flat_results = sum(ocr_result.word_results, ()) | |
| # for i, (text, score, _) in enumerate(flat_results): | |
| # word_list.append([i + 1, text, round(float(score), 3)]) | |
| # return vis_img, word_list, f"{ocr_result.elapse:.3f}s" | |
| # # 5. Build a clean, minimal UI | |
| # with gr.Blocks(title="Rapid⚡OCR Simple") as demo: | |
| # gr.Markdown("# Rapid⚡OCR v5") | |
| # gr.Markdown("Upload an image to extract text with word-level bounding boxes.") | |
| # with gr.Row(): | |
| # with gr.Column(): | |
| # input_img = gr.Image(label="Input Image", type="numpy") | |
| # run_btn = gr.Button("Run OCR", variant="primary") | |
| # with gr.Column(): | |
| # output_img = gr.Image(label="Preview (Bounding Boxes)") | |
| # elapse_info = gr.Textbox(label="Processing Time") | |
| # result_table = gr.Dataframe( | |
| # headers=["ID", "Text", "Confidence"], | |
| # label="Detected Words", | |
| # interactive=False | |
| # ) | |
| # run_btn.click( | |
| # fn=perform_ocr, | |
| # inputs=[input_img], | |
| # outputs=[output_img, result_table, elapse_info] | |
| # ) | |
| # if __name__ == "__main__": | |
| # demo.launch() | |
| import gradio as gr | |
| from rapidocr import RapidOCR, OCRVersion | |
| import json | |
| import tempfile | |
| import os | |
| # Initialize the engine with v5 defaults | |
| engine = RapidOCR(params={ | |
| "Det.ocr_version": OCRVersion.PPOCRV5, | |
| "Rec.ocr_version": OCRVersion.PPOCRV5, | |
| "Cls.ocr_version": OCRVersion.PPOCRV4, | |
| }) | |
| def perform_ocr(img): | |
| if img is None: | |
| return None, None, "0.0", None | |
| # Run OCR with word-level detection enabled | |
| ocr_result = engine(img, return_word_box=True) | |
| # Generate annotated image | |
| vis_img = ocr_result.vis() | |
| # Process results into the Table and JSON format | |
| word_list_for_table = [] | |
| json_data_list = [] | |
| if ocr_result.word_results: | |
| # Flatten the per-line word results into a single list | |
| flat_results = sum(ocr_result.word_results, ()) | |
| for i, (text, score, bbox) in enumerate(flat_results): | |
| # 1. Prepare Table Data | |
| word_list_for_table.append([i + 1, text, round(float(score), 3)]) | |
| # 2. Prepare JSON Data (Convert 4-point box to [xmin, ymin, xmax, ymax]) | |
| # bbox is typically [[x1,y1], [x2,y2], [x3,y3], [x4,y4]] | |
| xs = [p[0] for p in bbox] | |
| ys = [p[1] for p in bbox] | |
| xmin, ymin, xmax, ymax = min(xs), min(ys), max(xs), max(ys) | |
| json_data_list.append({ | |
| "word": text, | |
| "bbox": [int(xmin), int(ymin), int(xmax), int(ymax)], | |
| "type": "text" | |
| }) | |
| # Wrap in the requested page-based JSON structure | |
| final_json = [{ | |
| "page_number": 1, | |
| "data": json_data_list, | |
| "column_separator_x": None | |
| }] | |
| # Save to a temporary file for download | |
| temp_dir = tempfile.gettempdir() | |
| json_path = os.path.join(temp_dir, "ocr_results.json") | |
| with open(json_path, "w", encoding="utf-8") as f: | |
| json.dump(final_json, f, indent=4, ensure_ascii=False) | |
| return vis_img, word_list_for_table, f"{ocr_result.elapse:.3f}s", json_path | |
| # Gradio Interface | |
| with gr.Blocks(title="Rapid⚡OCR to JSON") as demo: | |
| gr.Markdown("# Rapid⚡OCR v5 with JSON Export") | |
| gr.Markdown("Extract word-level bounding boxes in the same format as your preprocessed data.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| input_img = gr.Image(label="Input Image", type="numpy") | |
| run_btn = gr.Button("Run OCR", variant="primary") | |
| with gr.Column(): | |
| output_img = gr.Image(label="Preview") | |
| elapse_info = gr.Textbox(label="Processing Time") | |
| json_download = gr.File(label="Download OCR JSON") | |
| result_table = gr.Dataframe( | |
| headers=["ID", "Text", "Confidence"], | |
| label="Detected Words", | |
| interactive=False | |
| ) | |
| run_btn.click( | |
| fn=perform_ocr, | |
| inputs=[input_img], | |
| outputs=[output_img, result_table, elapse_info, json_download] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |