# import gradio as gr # from rapidocr import RapidOCR, OCRVersion # # 1. Initialize the OCR engine once with v5 defaults # # We use v5 for Detection/Recognition and v4 for Classification (most stable v5 setup) # engine = RapidOCR(params={ # "Det.ocr_version": OCRVersion.PPOCRV5, # "Rec.ocr_version": OCRVersion.PPOCRV5, # "Cls.ocr_version": OCRVersion.PPOCRV4, # }) # def perform_ocr(img): # if img is None: # return None, None, "0.0" # # 2. Run OCR. return_word_box=True provides the word/char level detail # ocr_result = engine(img, return_word_box=True) # # 3. Get the annotated preview image # vis_img = ocr_result.vis() # # 4. Format word-level results for the Dataframe # # We flatten the word_results list using the logic from your advanced script # word_list = [] # if ocr_result.word_results: # flat_results = sum(ocr_result.word_results, ()) # for i, (text, score, _) in enumerate(flat_results): # word_list.append([i + 1, text, round(float(score), 3)]) # return vis_img, word_list, f"{ocr_result.elapse:.3f}s" # # 5. Build a clean, minimal UI # with gr.Blocks(title="Rapid⚡OCR Simple") as demo: # gr.Markdown("# Rapid⚡OCR v5") # gr.Markdown("Upload an image to extract text with word-level bounding boxes.") # with gr.Row(): # with gr.Column(): # input_img = gr.Image(label="Input Image", type="numpy") # run_btn = gr.Button("Run OCR", variant="primary") # with gr.Column(): # output_img = gr.Image(label="Preview (Bounding Boxes)") # elapse_info = gr.Textbox(label="Processing Time") # result_table = gr.Dataframe( # headers=["ID", "Text", "Confidence"], # label="Detected Words", # interactive=False # ) # run_btn.click( # fn=perform_ocr, # inputs=[input_img], # outputs=[output_img, result_table, elapse_info] # ) # if __name__ == "__main__": # demo.launch() import gradio as gr from rapidocr import RapidOCR, OCRVersion import json import tempfile import os # Initialize the engine with v5 defaults engine = RapidOCR(params={ "Det.ocr_version": OCRVersion.PPOCRV5, "Rec.ocr_version": OCRVersion.PPOCRV5, "Cls.ocr_version": OCRVersion.PPOCRV4, }) def perform_ocr(img): if img is None: return None, None, "0.0", None # Run OCR with word-level detection enabled ocr_result = engine(img, return_word_box=True) # Generate annotated image vis_img = ocr_result.vis() # Process results into the Table and JSON format word_list_for_table = [] json_data_list = [] if ocr_result.word_results: # Flatten the per-line word results into a single list flat_results = sum(ocr_result.word_results, ()) for i, (text, score, bbox) in enumerate(flat_results): # 1. Prepare Table Data word_list_for_table.append([i + 1, text, round(float(score), 3)]) # 2. Prepare JSON Data (Convert 4-point box to [xmin, ymin, xmax, ymax]) # bbox is typically [[x1,y1], [x2,y2], [x3,y3], [x4,y4]] xs = [p[0] for p in bbox] ys = [p[1] for p in bbox] xmin, ymin, xmax, ymax = min(xs), min(ys), max(xs), max(ys) json_data_list.append({ "word": text, "bbox": [int(xmin), int(ymin), int(xmax), int(ymax)], "type": "text" }) # Wrap in the requested page-based JSON structure final_json = [{ "page_number": 1, "data": json_data_list, "column_separator_x": None }] # Save to a temporary file for download temp_dir = tempfile.gettempdir() json_path = os.path.join(temp_dir, "ocr_results.json") with open(json_path, "w", encoding="utf-8") as f: json.dump(final_json, f, indent=4, ensure_ascii=False) return vis_img, word_list_for_table, f"{ocr_result.elapse:.3f}s", json_path # Gradio Interface with gr.Blocks(title="Rapid⚡OCR to JSON") as demo: gr.Markdown("# Rapid⚡OCR v5 with JSON Export") gr.Markdown("Extract word-level bounding boxes in the same format as your preprocessed data.") with gr.Row(): with gr.Column(): input_img = gr.Image(label="Input Image", type="numpy") run_btn = gr.Button("Run OCR", variant="primary") with gr.Column(): output_img = gr.Image(label="Preview") elapse_info = gr.Textbox(label="Processing Time") json_download = gr.File(label="Download OCR JSON") result_table = gr.Dataframe( headers=["ID", "Text", "Confidence"], label="Detected Words", interactive=False ) run_btn.click( fn=perform_ocr, inputs=[input_img], outputs=[output_img, result_table, elapse_info, json_download] ) if __name__ == "__main__": demo.launch()