iammraat commited on
Commit
e2f83a2
·
verified ·
1 Parent(s): 333f06a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -124
app.py CHANGED
@@ -1,116 +1,16 @@
1
- # import gradio as gr
2
- # import cv2
3
- # import numpy as np
4
- # from paddleocr import PaddleOCR
5
- # from PIL import Image
6
-
7
- # # Initialize PaddleOCR
8
- # # enable_mkldnn=False is CRITICAL to prevent the C++ crash
9
- # ocr = PaddleOCR(
10
- # use_textline_orientation=True,
11
- # lang='en',
12
- # ocr_version='PP-OCRv5',
13
- # enable_mkldnn=False
14
- # )
15
-
16
- # def run_ocr(input_image):
17
- # if input_image is None:
18
- # return None, "No image uploaded"
19
-
20
- # image_np = np.array(input_image)
21
-
22
- # # Run OCR
23
- # result = ocr.ocr(image_np)
24
-
25
- # if result is None or not result:
26
- # return input_image, "No text detected."
27
-
28
- # # Unwrap the list if needed (Paddle often returns [result])
29
- # if isinstance(result, list) and len(result) == 1:
30
- # result = result[0]
31
-
32
- # detected_texts = []
33
- # viz_image = image_np.copy()
34
-
35
- # # --- PARSING LOGIC FIX ---
36
-
37
- # # CASE 1: New V5 / PaddleX Format (The structure in your logs)
38
- # # It returns a dict with keys: 'rec_texts', 'rec_polys', 'rec_scores'
39
- # if isinstance(result, dict) and 'rec_texts' in result:
40
- # texts = result.get('rec_texts', [])
41
- # boxes = result.get('rec_polys', [])
42
- # scores = result.get('rec_scores', [])
43
-
44
- # # We zip them together to iterate
45
- # for box, text, score in zip(boxes, texts, scores):
46
- # detected_texts.append(f"{text} (Conf: {score:.2f})")
47
-
48
- # try:
49
- # # v5 polys are often already numpy arrays, but we ensure shape
50
- # box = np.array(box).astype(np.int32).reshape((-1, 1, 2))
51
- # cv2.polylines(viz_image, [box], isClosed=True, color=(0, 255, 255), thickness=2)
52
- # except Exception:
53
- # pass # Skip drawing if box format is weird, but keep text
54
-
55
- # # CASE 2: Legacy Format (List of lists)
56
- # # [ [box, (text, score)], ... ]
57
- # elif isinstance(result, list):
58
- # for line in result:
59
- # if not isinstance(line, list) or len(line) < 2:
60
- # continue
61
-
62
- # box = line[0]
63
- # text_content = line[1][0]
64
- # score = line[1][1]
65
-
66
- # detected_texts.append(f"{text_content} (Conf: {score:.2f})")
67
-
68
- # try:
69
- # box = np.array(box).astype(np.int32).reshape((-1, 1, 2))
70
- # cv2.polylines(viz_image, [box], isClosed=True, color=(0, 255, 255), thickness=2)
71
- # except Exception:
72
- # pass
73
-
74
- # return viz_image, "\n".join(detected_texts)
75
-
76
- # with gr.Blocks(title="PaddleOCR v5 Handwriting Demo") as demo:
77
- # gr.Markdown("## ⚡ PaddleOCR v5 (Handwriting Edition)")
78
-
79
- # with gr.Row():
80
- # with gr.Column():
81
- # input_img = gr.Image(type="pil", label="Input Document")
82
- # submit_btn = gr.Button("Read Handwriting", variant="primary")
83
-
84
- # with gr.Column():
85
- # output_img = gr.Image(label="Detections")
86
- # output_text = gr.Textbox(label="Recognized Text", lines=15)
87
-
88
- # submit_btn.click(fn=run_ocr, inputs=input_img, outputs=[output_img, output_text])
89
-
90
- # if __name__ == "__main__":
91
- # demo.launch(server_name="0.0.0.0", server_port=7860)
92
-
93
-
94
-
95
-
96
-
97
-
98
-
99
-
100
  import gradio as gr
101
  import cv2
102
  import numpy as np
103
  from paddleocr import PaddleOCR
104
  from PIL import Image
105
 
106
- # Initialize PaddleOCR with Mobile Weights (PP-OCRv4)
107
- # ocr_version='PP-OCRv4' defaults to the lightweight "Mobile" model.
108
- # This is the single biggest factor for CPU speed.
109
  ocr = PaddleOCR(
110
  use_textline_orientation=True,
111
  lang='en',
112
- ocr_version='PP-OCRv4',
113
- enable_mkldnn=False # Keep False to avoid the C++ crash you saw earlier
114
  )
115
 
116
  def run_ocr(input_image):
@@ -119,51 +19,46 @@ def run_ocr(input_image):
119
 
120
  image_np = np.array(input_image)
121
 
122
- # --- OPTIMIZATION: Resize large images ---
123
- # OCR on 4k images is slow on CPU. Resizing to ~1280px width usually
124
- # keeps text readable but speeds up inference by 2x-4x.
125
- height, width = image_np.shape[:2]
126
- MAX_WIDTH = 1280
127
- if width > MAX_WIDTH:
128
- scale = MAX_WIDTH / width
129
- new_height = int(height * scale)
130
- image_np = cv2.resize(image_np, (MAX_WIDTH, new_height))
131
-
132
  # Run OCR
133
  result = ocr.ocr(image_np)
134
 
135
  if result is None or not result:
136
  return input_image, "No text detected."
137
 
138
- # Unwrap list if nested
139
  if isinstance(result, list) and len(result) == 1:
140
  result = result[0]
141
 
142
  detected_texts = []
143
  viz_image = image_np.copy()
144
 
145
- # --- ROBUST PARSING (Handles both v4 and v5 formats) ---
146
 
147
- # Check if result is the new Dictionary format (v5 style)
 
148
  if isinstance(result, dict) and 'rec_texts' in result:
149
  texts = result.get('rec_texts', [])
150
  boxes = result.get('rec_polys', [])
151
  scores = result.get('rec_scores', [])
152
 
 
153
  for box, text, score in zip(boxes, texts, scores):
154
  detected_texts.append(f"{text} (Conf: {score:.2f})")
 
155
  try:
 
156
  box = np.array(box).astype(np.int32).reshape((-1, 1, 2))
157
  cv2.polylines(viz_image, [box], isClosed=True, color=(0, 255, 255), thickness=2)
158
- except: pass
 
159
 
160
- # Check if result is the Standard List format (v3/v4 style)
 
161
  elif isinstance(result, list):
162
  for line in result:
163
  if not isinstance(line, list) or len(line) < 2:
164
  continue
165
 
166
- # v4 format: [ [box_coords], (text, confidence) ]
167
  box = line[0]
168
  text_content = line[1][0]
169
  score = line[1][1]
@@ -173,12 +68,13 @@ def run_ocr(input_image):
173
  try:
174
  box = np.array(box).astype(np.int32).reshape((-1, 1, 2))
175
  cv2.polylines(viz_image, [box], isClosed=True, color=(0, 255, 255), thickness=2)
176
- except: pass
 
177
 
178
  return viz_image, "\n".join(detected_texts)
179
 
180
- with gr.Blocks(title="Fast Handwriting OCR") as demo:
181
- gr.Markdown("## ⚡ Fast CPU Handwriting OCR")
182
 
183
  with gr.Row():
184
  with gr.Column():
@@ -192,4 +88,7 @@ with gr.Blocks(title="Fast Handwriting OCR") as demo:
192
  submit_btn.click(fn=run_ocr, inputs=input_img, outputs=[output_img, output_text])
193
 
194
  if __name__ == "__main__":
195
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import cv2
3
  import numpy as np
4
  from paddleocr import PaddleOCR
5
  from PIL import Image
6
 
7
+ # Initialize PaddleOCR
8
+ # enable_mkldnn=False is CRITICAL to prevent the C++ crash
 
9
  ocr = PaddleOCR(
10
  use_textline_orientation=True,
11
  lang='en',
12
+ ocr_version='PP-OCRv5',
13
+ enable_mkldnn=False
14
  )
15
 
16
  def run_ocr(input_image):
 
19
 
20
  image_np = np.array(input_image)
21
 
 
 
 
 
 
 
 
 
 
 
22
  # Run OCR
23
  result = ocr.ocr(image_np)
24
 
25
  if result is None or not result:
26
  return input_image, "No text detected."
27
 
28
+ # Unwrap the list if needed (Paddle often returns [result])
29
  if isinstance(result, list) and len(result) == 1:
30
  result = result[0]
31
 
32
  detected_texts = []
33
  viz_image = image_np.copy()
34
 
35
+ # --- PARSING LOGIC FIX ---
36
 
37
+ # CASE 1: New V5 / PaddleX Format (The structure in your logs)
38
+ # It returns a dict with keys: 'rec_texts', 'rec_polys', 'rec_scores'
39
  if isinstance(result, dict) and 'rec_texts' in result:
40
  texts = result.get('rec_texts', [])
41
  boxes = result.get('rec_polys', [])
42
  scores = result.get('rec_scores', [])
43
 
44
+ # We zip them together to iterate
45
  for box, text, score in zip(boxes, texts, scores):
46
  detected_texts.append(f"{text} (Conf: {score:.2f})")
47
+
48
  try:
49
+ # v5 polys are often already numpy arrays, but we ensure shape
50
  box = np.array(box).astype(np.int32).reshape((-1, 1, 2))
51
  cv2.polylines(viz_image, [box], isClosed=True, color=(0, 255, 255), thickness=2)
52
+ except Exception:
53
+ pass # Skip drawing if box format is weird, but keep text
54
 
55
+ # CASE 2: Legacy Format (List of lists)
56
+ # [ [box, (text, score)], ... ]
57
  elif isinstance(result, list):
58
  for line in result:
59
  if not isinstance(line, list) or len(line) < 2:
60
  continue
61
 
 
62
  box = line[0]
63
  text_content = line[1][0]
64
  score = line[1][1]
 
68
  try:
69
  box = np.array(box).astype(np.int32).reshape((-1, 1, 2))
70
  cv2.polylines(viz_image, [box], isClosed=True, color=(0, 255, 255), thickness=2)
71
+ except Exception:
72
+ pass
73
 
74
  return viz_image, "\n".join(detected_texts)
75
 
76
+ with gr.Blocks(title="PaddleOCR v5 Handwriting Demo") as demo:
77
+ gr.Markdown("## ⚡ PaddleOCR v5 (Handwriting Edition)")
78
 
79
  with gr.Row():
80
  with gr.Column():
 
88
  submit_btn.click(fn=run_ocr, inputs=input_img, outputs=[output_img, output_text])
89
 
90
  if __name__ == "__main__":
91
+ demo.launch(server_name="0.0.0.0", server_port=7860)
92
+
93
+
94
+