Spaces:

heerjtdev
/

rocr

Sleeping

App Files Files Community

heerjtdev commited on Jan 16

Commit

4095636

verified ·

1 Parent(s): fc8c0fc

Update app.py

Browse files

Files changed (1) hide show

app.py +122 -17

app.py CHANGED Viewed

@@ -1,8 +1,83 @@
 import gradio as gr
 from rapidocr import RapidOCR, OCRVersion
-# 1. Initialize the OCR engine once with v5 defaults
-# We use v5 for Detection/Recognition and v4 for Classification (most stable v5 setup)
 engine = RapidOCR(params={
     "Det.ocr_version": OCRVersion.PPOCRV5,
     "Rec.ocr_version": OCRVersion.PPOCRV5,
@@ -11,28 +86,57 @@ engine = RapidOCR(params={
 def perform_ocr(img):
     if img is None:
-        return None, None, "0.0"
-    # 2. Run OCR. return_word_box=True provides the word/char level detail
     ocr_result = engine(img, return_word_box=True)
-    # 3. Get the annotated preview image
     vis_img = ocr_result.vis()
-    # 4. Format word-level results for the Dataframe
-    # We flatten the word_results list using the logic from your advanced script
-    word_list = []
     if ocr_result.word_results:
         flat_results = sum(ocr_result.word_results, ())
-        for i, (text, score, _) in enumerate(flat_results):
-            word_list.append([i + 1, text, round(float(score), 3)])
-    return vis_img, word_list, f"{ocr_result.elapse:.3f}s"
-# 5. Build a clean, minimal UI
-with gr.Blocks(title="Rapid⚡OCR Simple") as demo:
-    gr.Markdown("# Rapid⚡OCR v5")
-    gr.Markdown("Upload an image to extract text with word-level bounding boxes.")
     with gr.Row():
         with gr.Column():
@@ -40,8 +144,9 @@ with gr.Blocks(title="Rapid⚡OCR Simple") as demo:
             run_btn = gr.Button("Run OCR", variant="primary")
         with gr.Column():
-            output_img = gr.Image(label="Preview (Bounding Boxes)")
             elapse_info = gr.Textbox(label="Processing Time")
     result_table = gr.Dataframe(
         headers=["ID", "Text", "Confidence"],
@@ -52,7 +157,7 @@ with gr.Blocks(title="Rapid⚡OCR Simple") as demo:
     run_btn.click(
         fn=perform_ocr,
         inputs=[input_img],
-        outputs=[output_img, result_table, elapse_info]
     )
 if __name__ == "__main__":

+# import gradio as gr
+# from rapidocr import RapidOCR, OCRVersion
+# # 1. Initialize the OCR engine once with v5 defaults
+# # We use v5 for Detection/Recognition and v4 for Classification (most stable v5 setup)
+# engine = RapidOCR(params={
+#     "Det.ocr_version": OCRVersion.PPOCRV5,
+#     "Rec.ocr_version": OCRVersion.PPOCRV5,
+#     "Cls.ocr_version": OCRVersion.PPOCRV4,
+# })
+# def perform_ocr(img):
+#     if img is None:
+#         return None, None, "0.0"
+#     # 2. Run OCR. return_word_box=True provides the word/char level detail
+#     ocr_result = engine(img, return_word_box=True)
+#     # 3. Get the annotated preview image
+#     vis_img = ocr_result.vis()
+#     # 4. Format word-level results for the Dataframe
+#     # We flatten the word_results list using the logic from your advanced script
+#     word_list = []
+#     if ocr_result.word_results:
+#         flat_results = sum(ocr_result.word_results, ())
+#         for i, (text, score, _) in enumerate(flat_results):
+#             word_list.append([i + 1, text, round(float(score), 3)])
+#     return vis_img, word_list, f"{ocr_result.elapse:.3f}s"
+# # 5. Build a clean, minimal UI
+# with gr.Blocks(title="Rapid⚡OCR Simple") as demo:
+#     gr.Markdown("# Rapid⚡OCR v5")
+#     gr.Markdown("Upload an image to extract text with word-level bounding boxes.")
+#     with gr.Row():
+#         with gr.Column():
+#             input_img = gr.Image(label="Input Image", type="numpy")
+#             run_btn = gr.Button("Run OCR", variant="primary")
+#         with gr.Column():
+#             output_img = gr.Image(label="Preview (Bounding Boxes)")
+#             elapse_info = gr.Textbox(label="Processing Time")
+#     result_table = gr.Dataframe(
+#         headers=["ID", "Text", "Confidence"],
+#         label="Detected Words",
+#         interactive=False
+#     )
+#     run_btn.click(
+#         fn=perform_ocr,
+#         inputs=[input_img],
+#         outputs=[output_img, result_table, elapse_info]
+#     )
+# if __name__ == "__main__":
+#     demo.launch()
 import gradio as gr
 from rapidocr import RapidOCR, OCRVersion
+import json
+import tempfile
+import os
+# Initialize the engine with v5 defaults
 engine = RapidOCR(params={
     "Det.ocr_version": OCRVersion.PPOCRV5,
     "Rec.ocr_version": OCRVersion.PPOCRV5,
 def perform_ocr(img):
     if img is None:
+        return None, None, "0.0", None
+    # Run OCR with word-level detection enabled
     ocr_result = engine(img, return_word_box=True)
+    # Generate annotated image
     vis_img = ocr_result.vis()
+    # Process results into the Table and JSON format
+    word_list_for_table = []
+    json_data_list = []
     if ocr_result.word_results:
+        # Flatten the per-line word results into a single list
         flat_results = sum(ocr_result.word_results, ())
+        for i, (text, score, bbox) in enumerate(flat_results):
+            # 1. Prepare Table Data
+            word_list_for_table.append([i + 1, text, round(float(score), 3)])
+            # 2. Prepare JSON Data (Convert 4-point box to [xmin, ymin, xmax, ymax])
+            # bbox is typically [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
+            xs = [p[0] for p in bbox]
+            ys = [p[1] for p in bbox]
+            xmin, ymin, xmax, ymax = min(xs), min(ys), max(xs), max(ys)
+            json_data_list.append({
+                "word": text,
+                "bbox": [int(xmin), int(ymin), int(xmax), int(ymax)],
+                "type": "text"
+            })
+    # Wrap in the requested page-based JSON structure
+    final_json = [{
+        "page_number": 1,
+        "data": json_data_list,
+        "column_separator_x": None
+    }]
+    # Save to a temporary file for download
+    temp_dir = tempfile.gettempdir()
+    json_path = os.path.join(temp_dir, "ocr_results.json")
+    with open(json_path, "w", encoding="utf-8") as f:
+        json.dump(final_json, f, indent=4, ensure_ascii=False)
+    return vis_img, word_list_for_table, f"{ocr_result.elapse:.3f}s", json_path
+# Gradio Interface
+with gr.Blocks(title="Rapid⚡OCR to JSON") as demo:
+    gr.Markdown("# Rapid⚡OCR v5 with JSON Export")
+    gr.Markdown("Extract word-level bounding boxes in the same format as your preprocessed data.")
     with gr.Row():
         with gr.Column():
             run_btn = gr.Button("Run OCR", variant="primary")
         with gr.Column():
+            output_img = gr.Image(label="Preview")
             elapse_info = gr.Textbox(label="Processing Time")
+            json_download = gr.File(label="Download OCR JSON")
     result_table = gr.Dataframe(
         headers=["ID", "Text", "Confidence"],
     run_btn.click(
         fn=perform_ocr,
         inputs=[input_img],
+        outputs=[output_img, result_table, elapse_info, json_download]
     )
 if __name__ == "__main__":