heerjtdev commited on
Commit
4095636
·
verified ·
1 Parent(s): fc8c0fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +122 -17
app.py CHANGED
@@ -1,8 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  from rapidocr import RapidOCR, OCRVersion
 
 
 
3
 
4
- # 1. Initialize the OCR engine once with v5 defaults
5
- # We use v5 for Detection/Recognition and v4 for Classification (most stable v5 setup)
6
  engine = RapidOCR(params={
7
  "Det.ocr_version": OCRVersion.PPOCRV5,
8
  "Rec.ocr_version": OCRVersion.PPOCRV5,
@@ -11,28 +86,57 @@ engine = RapidOCR(params={
11
 
12
  def perform_ocr(img):
13
  if img is None:
14
- return None, None, "0.0"
15
 
16
- # 2. Run OCR. return_word_box=True provides the word/char level detail
17
  ocr_result = engine(img, return_word_box=True)
18
 
19
- # 3. Get the annotated preview image
20
  vis_img = ocr_result.vis()
21
 
22
- # 4. Format word-level results for the Dataframe
23
- # We flatten the word_results list using the logic from your advanced script
24
- word_list = []
 
25
  if ocr_result.word_results:
 
26
  flat_results = sum(ocr_result.word_results, ())
27
- for i, (text, score, _) in enumerate(flat_results):
28
- word_list.append([i + 1, text, round(float(score), 3)])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
- return vis_img, word_list, f"{ocr_result.elapse:.3f}s"
31
 
32
- # 5. Build a clean, minimal UI
33
- with gr.Blocks(title="Rapid⚡OCR Simple") as demo:
34
- gr.Markdown("# Rapid⚡OCR v5")
35
- gr.Markdown("Upload an image to extract text with word-level bounding boxes.")
36
 
37
  with gr.Row():
38
  with gr.Column():
@@ -40,8 +144,9 @@ with gr.Blocks(title="Rapid⚡OCR Simple") as demo:
40
  run_btn = gr.Button("Run OCR", variant="primary")
41
 
42
  with gr.Column():
43
- output_img = gr.Image(label="Preview (Bounding Boxes)")
44
  elapse_info = gr.Textbox(label="Processing Time")
 
45
 
46
  result_table = gr.Dataframe(
47
  headers=["ID", "Text", "Confidence"],
@@ -52,7 +157,7 @@ with gr.Blocks(title="Rapid⚡OCR Simple") as demo:
52
  run_btn.click(
53
  fn=perform_ocr,
54
  inputs=[input_img],
55
- outputs=[output_img, result_table, elapse_info]
56
  )
57
 
58
  if __name__ == "__main__":
 
1
+ # import gradio as gr
2
+ # from rapidocr import RapidOCR, OCRVersion
3
+
4
+ # # 1. Initialize the OCR engine once with v5 defaults
5
+ # # We use v5 for Detection/Recognition and v4 for Classification (most stable v5 setup)
6
+ # engine = RapidOCR(params={
7
+ # "Det.ocr_version": OCRVersion.PPOCRV5,
8
+ # "Rec.ocr_version": OCRVersion.PPOCRV5,
9
+ # "Cls.ocr_version": OCRVersion.PPOCRV4,
10
+ # })
11
+
12
+ # def perform_ocr(img):
13
+ # if img is None:
14
+ # return None, None, "0.0"
15
+
16
+ # # 2. Run OCR. return_word_box=True provides the word/char level detail
17
+ # ocr_result = engine(img, return_word_box=True)
18
+
19
+ # # 3. Get the annotated preview image
20
+ # vis_img = ocr_result.vis()
21
+
22
+ # # 4. Format word-level results for the Dataframe
23
+ # # We flatten the word_results list using the logic from your advanced script
24
+ # word_list = []
25
+ # if ocr_result.word_results:
26
+ # flat_results = sum(ocr_result.word_results, ())
27
+ # for i, (text, score, _) in enumerate(flat_results):
28
+ # word_list.append([i + 1, text, round(float(score), 3)])
29
+
30
+ # return vis_img, word_list, f"{ocr_result.elapse:.3f}s"
31
+
32
+ # # 5. Build a clean, minimal UI
33
+ # with gr.Blocks(title="Rapid⚡OCR Simple") as demo:
34
+ # gr.Markdown("# Rapid⚡OCR v5")
35
+ # gr.Markdown("Upload an image to extract text with word-level bounding boxes.")
36
+
37
+ # with gr.Row():
38
+ # with gr.Column():
39
+ # input_img = gr.Image(label="Input Image", type="numpy")
40
+ # run_btn = gr.Button("Run OCR", variant="primary")
41
+
42
+ # with gr.Column():
43
+ # output_img = gr.Image(label="Preview (Bounding Boxes)")
44
+ # elapse_info = gr.Textbox(label="Processing Time")
45
+
46
+ # result_table = gr.Dataframe(
47
+ # headers=["ID", "Text", "Confidence"],
48
+ # label="Detected Words",
49
+ # interactive=False
50
+ # )
51
+
52
+ # run_btn.click(
53
+ # fn=perform_ocr,
54
+ # inputs=[input_img],
55
+ # outputs=[output_img, result_table, elapse_info]
56
+ # )
57
+
58
+ # if __name__ == "__main__":
59
+ # demo.launch()
60
+
61
+
62
+
63
+
64
+
65
+
66
+
67
+
68
+
69
+
70
+
71
+
72
+
73
+
74
  import gradio as gr
75
  from rapidocr import RapidOCR, OCRVersion
76
+ import json
77
+ import tempfile
78
+ import os
79
 
80
+ # Initialize the engine with v5 defaults
 
81
  engine = RapidOCR(params={
82
  "Det.ocr_version": OCRVersion.PPOCRV5,
83
  "Rec.ocr_version": OCRVersion.PPOCRV5,
 
86
 
87
  def perform_ocr(img):
88
  if img is None:
89
+ return None, None, "0.0", None
90
 
91
+ # Run OCR with word-level detection enabled
92
  ocr_result = engine(img, return_word_box=True)
93
 
94
+ # Generate annotated image
95
  vis_img = ocr_result.vis()
96
 
97
+ # Process results into the Table and JSON format
98
+ word_list_for_table = []
99
+ json_data_list = []
100
+
101
  if ocr_result.word_results:
102
+ # Flatten the per-line word results into a single list
103
  flat_results = sum(ocr_result.word_results, ())
104
+
105
+ for i, (text, score, bbox) in enumerate(flat_results):
106
+ # 1. Prepare Table Data
107
+ word_list_for_table.append([i + 1, text, round(float(score), 3)])
108
+
109
+ # 2. Prepare JSON Data (Convert 4-point box to [xmin, ymin, xmax, ymax])
110
+ # bbox is typically [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
111
+ xs = [p[0] for p in bbox]
112
+ ys = [p[1] for p in bbox]
113
+ xmin, ymin, xmax, ymax = min(xs), min(ys), max(xs), max(ys)
114
+
115
+ json_data_list.append({
116
+ "word": text,
117
+ "bbox": [int(xmin), int(ymin), int(xmax), int(ymax)],
118
+ "type": "text"
119
+ })
120
+
121
+ # Wrap in the requested page-based JSON structure
122
+ final_json = [{
123
+ "page_number": 1,
124
+ "data": json_data_list,
125
+ "column_separator_x": None
126
+ }]
127
+
128
+ # Save to a temporary file for download
129
+ temp_dir = tempfile.gettempdir()
130
+ json_path = os.path.join(temp_dir, "ocr_results.json")
131
+ with open(json_path, "w", encoding="utf-8") as f:
132
+ json.dump(final_json, f, indent=4, ensure_ascii=False)
133
 
134
+ return vis_img, word_list_for_table, f"{ocr_result.elapse:.3f}s", json_path
135
 
136
+ # Gradio Interface
137
+ with gr.Blocks(title="Rapid⚡OCR to JSON") as demo:
138
+ gr.Markdown("# Rapid⚡OCR v5 with JSON Export")
139
+ gr.Markdown("Extract word-level bounding boxes in the same format as your preprocessed data.")
140
 
141
  with gr.Row():
142
  with gr.Column():
 
144
  run_btn = gr.Button("Run OCR", variant="primary")
145
 
146
  with gr.Column():
147
+ output_img = gr.Image(label="Preview")
148
  elapse_info = gr.Textbox(label="Processing Time")
149
+ json_download = gr.File(label="Download OCR JSON")
150
 
151
  result_table = gr.Dataframe(
152
  headers=["ID", "Text", "Confidence"],
 
157
  run_btn.click(
158
  fn=perform_ocr,
159
  inputs=[input_img],
160
+ outputs=[output_img, result_table, elapse_info, json_download]
161
  )
162
 
163
  if __name__ == "__main__":