ranbac commited on
Commit
233fd7a
·
verified ·
1 Parent(s): 802ddd9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +141 -102
app.py CHANGED
@@ -13,69 +13,88 @@ from paddleocr import PaddleOCR
13
  from PIL import Image, ImageDraw, ImageFont
14
  import numpy as np
15
  import requests
 
16
 
17
  # Tắt log thừa
18
  logging.getLogger("ppocr").setLevel(logging.WARNING)
19
 
20
- # --- QUẢN MODEL (Cache Model) ---
21
- print("Đang khởi tạo hệ thống quản lý Model...")
22
- OCR_ENGINES = {}
23
 
24
- def get_ocr_model(lang_code):
25
- if lang_code in OCR_ENGINES:
26
- return OCR_ENGINES[lang_code]
27
-
28
- print(f"🔄 Đang tải model ngôn ngữ: {lang_code}...")
29
- try:
30
- # lang='vi' trong PaddleOCR sử dụng latin_dict, hỗ trợ:
31
- # Tiếng Việt, Anh, Pháp, Đức, Ý, Tây Ban Nha, Bồ Đào Nha, v.v...
32
- engine = PaddleOCR(
33
- use_angle_cls=True, # Bật tự động xoay ảnh
34
- use_textline_orientation=True,
35
- lang=lang_code
36
- )
37
- OCR_ENGINES[lang_code] = engine
38
- return engine
39
- except Exception as e:
40
- print(f"⚠️ Lỗi tải model {lang_code}: {e}. Chuyển về 'en'.")
41
- engine = PaddleOCR(lang='en')
42
- OCR_ENGINES[lang_code] = engine
43
- return engine
44
-
45
- # --- TẢI FONT HỖ TRỢ TIẾNG VIỆT (NOTO SANS) ---
46
- def check_and_download_font():
47
- # Đổi sang NotoSans để hỗ trợ Tiếng Việt và Latin đầy đủ
48
- font_filename = "NotoSans-Regular.ttf"
49
- font_path = f"./{font_filename}"
50
-
 
 
 
 
 
 
 
 
 
 
51
  if not os.path.exists(font_path):
52
- print("⬇️ Đang tải font hỗ trợ Tiếng Việt (Noto Sans)...")
53
  try:
54
- # Link tải font Noto Sans chính chủ Google
55
- url = "https://github.com/google/fonts/raw/main/ofl/notosans/NotoSans-Regular.ttf"
56
  r = requests.get(url, allow_redirects=True)
57
  with open(font_path, 'wb') as f:
58
  f.write(r.content)
59
- print("Đã tải font thành công.")
60
- except Exception as e:
61
- print(f"⚠️ Lỗi tải font: {e}")
62
  return None
63
  return font_path
64
 
65
- FONT_PATH = check_and_download_font()
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
- # --- HÀM VẼ ĐA NĂNG (GIỮ NGUYÊN LOGIC VẼ) ---
68
  def universal_draw(image, raw_data, font_path):
69
  if image is None: return image
70
- if isinstance(image, np.ndarray):
71
- image = Image.fromarray(image)
72
 
73
  canvas = image.copy()
74
  draw = ImageDraw.Draw(canvas)
75
 
76
  try:
77
- # Tăng kích thước font một chút cho dễ nhìn
78
- font_size = 26
79
  font = ImageFont.truetype(font_path, font_size) if font_path else ImageFont.load_default()
80
  except:
81
  font = ImageFont.load_default()
@@ -90,16 +109,19 @@ def universal_draw(image, raw_data, font_path):
90
  except: return None
91
 
92
  items_to_draw = []
93
-
94
- # Logic lấy dữ liệu
95
  processed = False
96
  if isinstance(raw_data, list) and len(raw_data) > 0 and isinstance(raw_data[0], dict):
97
  data_dict = raw_data[0]
98
  texts = data_dict.get('rec_texts')
99
  boxes = data_dict.get('dt_polys', data_dict.get('rec_polys', data_dict.get('dt_boxes')))
100
- if texts and boxes:
 
101
  for i in range(min(len(texts), len(boxes))):
102
- items_to_draw.append((parse_box(boxes[i]), texts[i]))
 
 
103
  processed = True
104
 
105
  if not processed:
@@ -108,7 +130,7 @@ def universal_draw(image, raw_data, font_path):
108
  box = None; text = None
109
  for k in ['points', 'box', 'dt_boxes', 'poly']:
110
  if k in data: box = parse_box(data[k]); break
111
- for k in ['transcription', 'text', 'rec_text']:
112
  if k in data: text = data[k]; break
113
  if box and text: items_to_draw.append((box, text)); return
114
  for v in data.values(): hunt(v)
@@ -121,17 +143,13 @@ def universal_draw(image, raw_data, font_path):
121
  for item in data: hunt(item)
122
  hunt(raw_data)
123
 
124
- # Thực hiện vẽ
125
  for box, txt in items_to_draw:
126
- if not box or not txt: continue
127
  try:
128
- draw.polygon(box, outline="#00FF00", width=3) # Đổi sang màu xanh lá cho nổi
129
-
130
- # Vẽ nền chữ
131
  txt_x, txt_y = box[0]
132
  if hasattr(draw, "textbbox"):
133
- bbox = draw.textbbox((txt_x, txt_y), txt, font=font, anchor="lb")
134
- draw.rectangle(bbox, fill="#00AA00") # Nền xanh
135
  draw.text((txt_x, txt_y), txt, fill="white", font=font, anchor="lb")
136
  else:
137
  draw.text((txt_x, txt_y - font_size), txt, fill="white", font=font)
@@ -139,83 +157,104 @@ def universal_draw(image, raw_data, font_path):
139
 
140
  return canvas
141
 
142
- # --- XỬ LÝ TEXT ---
143
  def deep_extract_text(data):
144
- found = []
145
- if isinstance(data, str): return [data] if data.strip() else []
146
- if isinstance(data, (list, tuple)):
147
- for i in data: found.extend(deep_extract_text(i))
 
 
148
  elif isinstance(data, dict):
149
- for v in data.values(): found.extend(deep_extract_text(v))
150
- return found
 
151
 
152
  def clean_text_result(text_list):
153
  cleaned = []
154
- ignore = ['min', 'max', 'header', 'footer']
155
  for t in text_list:
156
  t = t.strip()
157
- if len(t) < 2 and not any(u'\u00C0' <= c <= u'\u1EF9' for c in t) and not t.isalnum(): continue
158
- if t.lower() in ignore: continue
 
159
  cleaned.append(t)
160
  return cleaned
161
 
162
  # --- MAIN PREDICT ---
163
  def predict(image, lang_code):
164
  if image is None: return None, "Chưa có ảnh.", "No Data"
 
 
 
 
 
 
 
 
 
165
 
 
 
166
  try:
167
- # Load model đúng ngôn ngữ
168
- ocr_engine = get_ocr_model(lang_code)
169
-
170
- img_np = np.array(image)
171
- original_pil = image.copy() if isinstance(image, Image.Image) else Image.fromarray(image)
172
-
173
- # RUN OCR
174
- raw_result = ocr_engine.ocr(img_np)
175
-
176
- # Xử lý kết quả & Vẽ
177
- annotated_image = universal_draw(original_pil, raw_result, FONT_PATH)
178
-
179
- # Lấy text
180
- texts = deep_extract_text(raw_result)
181
- final_text = "\n".join(clean_text_result(texts)) if texts else "Không tìm thấy văn bản."
182
 
183
- debug_info = f"Language: {lang_code}\nFont Loaded: {FONT_PATH}\nRaw Data Sample:\n{str(raw_result)[:800]}..."
184
-
185
- return annotated_image, final_text, debug_info
186
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  except Exception as e:
188
  import traceback
189
- return image, f"Lỗi: {e}", traceback.format_exc()
190
 
191
  # --- GIAO DIỆN ---
192
- with gr.Blocks(title="PaddleOCR Multi-Lang Pro") as demo:
193
- gr.Markdown("## 🇻🇳 PaddleOCR: Hỗ trợ Tiếng Việt & Đa Ngôn Ngữ")
194
 
195
  with gr.Row():
196
- with gr.Column(scale=1):
197
- input_img = gr.Image(type="pil", label="Ảnh đầu vào")
198
-
199
- # Dropdown chọn ngôn ngữ (Mặc định là 'vi' để sửa lỗi của bạn)
200
  lang_dropdown = gr.Dropdown(
201
- choices=["vi", "en", "ch", "japan", "korean", "french", "german"],
202
- value="vi",
203
- label="Ngôn Ngữ (Language Model)",
204
- info="Chọn 'vi' để hỗ trợ tốt nhất: Tiếng Việt + Toàn bộ các ngôn ngữ Latin (Anh, Pháp, Đức, Ý...)"
205
  )
206
-
207
- btn = gr.Button("CHẠY OCR", variant="primary")
208
-
209
- with gr.Column(scale=1):
210
  with gr.Tabs():
211
  with gr.TabItem("🖼️ Kết quả"):
212
- output_img = gr.Image(type="pil", label="Ảnh đã nhận diện")
213
  with gr.TabItem("📝 Text"):
214
- output_txt = gr.Textbox(label="Nội dung", lines=15)
215
  with gr.TabItem("🐞 Debug"):
216
- output_debug = gr.Textbox(label="Log", lines=10)
217
 
218
- btn.click(predict, [input_img, lang_dropdown], [output_img, output_txt, output_debug])
 
 
 
 
219
 
220
  if __name__ == "__main__":
221
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
13
  from PIL import Image, ImageDraw, ImageFont
14
  import numpy as np
15
  import requests
16
+ import traceback # Thêm để in lỗi chi tiết
17
 
18
  # Tắt log thừa
19
  logging.getLogger("ppocr").setLevel(logging.WARNING)
20
 
21
+ # --- KHỞI TẠO PADDLEOCR ĐA NGÔN NGỮ ---
22
+ print("Đang khởi tạo các Model OCR...")
 
23
 
24
+ ocr_engines = {}
25
+
26
+ # 1. Model Trung Quốc
27
+ try:
28
+ print(" - Đang tải Model Tiếng Trung (ch)...")
29
+ ocr_engines['ch'] = PaddleOCR(
30
+ use_textline_orientation=True,
31
+ use_doc_orientation_classify=False,
32
+ use_doc_unwarping=False,
33
+ lang='ch',
34
+ show_log=False
35
+ )
36
+ print(" -> OK: Model Trung Quốc đã sẵn sàng.")
37
+ except Exception as e:
38
+ print(f" -> LỖI khởi tạo Model Trung Quốc: {e}")
39
+
40
+ # 2. Model Latin (SỬA LỖI: Dùng lang='en' để bao trùm tất cả ngôn ngữ Latin/Việt)
41
+ try:
42
+ print(" - Đang tải Model Latin/Việt (en)...")
43
+ # Lưu ý: lang='en' trong PP-OCRv3/v4 hỗ trợ toàn bộ ký tự Latin mở rộng (Việt, Pháp, Đức, v.v.)
44
+ ocr_engines['latin'] = PaddleOCR(
45
+ use_textline_orientation=True,
46
+ use_doc_orientation_classify=False,
47
+ use_doc_unwarping=False,
48
+ lang='en', # QUAN TRỌNG: Dùng 'en' thay vì 'latin'
49
+ show_log=False
50
+ )
51
+ print(" -> OK: Model Latin/Việt đã sẵn sàng.")
52
+ except Exception as e:
53
+ print(f" -> LỖI khởi tạo Model Latin: {e}")
54
+ print(traceback.format_exc()) # In chi tiết lỗi để debug
55
+
56
+ print("Quá trình khởi tạo hoàn tất!")
57
+
58
+ # --- TẢI FONT ---
59
+ def check_and_download_font(font_name, url):
60
+ font_path = f"./{font_name}"
61
  if not os.path.exists(font_path):
 
62
  try:
63
+ print(f"Đang tải font {font_name}...")
 
64
  r = requests.get(url, allow_redirects=True)
65
  with open(font_path, 'wb') as f:
66
  f.write(r.content)
67
+ print(f"Đã tải xong {font_name}.")
68
+ except:
69
+ print(f"Không thể tải {font_name}. Vui lòng kiểm tra internet.")
70
  return None
71
  return font_path
72
 
73
+ FONT_CH_PATH = check_and_download_font(
74
+ "simfang.ttf",
75
+ "https://github.com/StellarCN/scp_zh/raw/master/fonts/SimFang.ttf"
76
+ )
77
+
78
+ FONT_LATIN_PATH = check_and_download_font(
79
+ "Roboto-Regular.ttf",
80
+ "https://github.com/google/fonts/raw/main/apache/robotoslab/RobotoSlab-Regular.ttf"
81
+ )
82
+
83
+ FONT_MAP = {
84
+ 'ch': FONT_CH_PATH,
85
+ 'latin': FONT_LATIN_PATH
86
+ }
87
 
88
+ # --- HÀM VẼ ĐA NĂNG ---
89
  def universal_draw(image, raw_data, font_path):
90
  if image is None: return image
91
+ if isinstance(image, np.ndarray): image = Image.fromarray(image)
 
92
 
93
  canvas = image.copy()
94
  draw = ImageDraw.Draw(canvas)
95
 
96
  try:
97
+ font_size = 24
 
98
  font = ImageFont.truetype(font_path, font_size) if font_path else ImageFont.load_default()
99
  except:
100
  font = ImageFont.load_default()
 
109
  except: return None
110
 
111
  items_to_draw = []
112
+
113
+ # Logic tìm box/text
114
  processed = False
115
  if isinstance(raw_data, list) and len(raw_data) > 0 and isinstance(raw_data[0], dict):
116
  data_dict = raw_data[0]
117
  texts = data_dict.get('rec_texts')
118
  boxes = data_dict.get('dt_polys', data_dict.get('rec_polys', data_dict.get('dt_boxes')))
119
+
120
+ if texts and boxes and isinstance(texts, list) and isinstance(boxes, list):
121
  for i in range(min(len(texts), len(boxes))):
122
+ txt = texts[i]
123
+ box = parse_box(boxes[i])
124
+ if box and txt: items_to_draw.append((box, txt))
125
  processed = True
126
 
127
  if not processed:
 
130
  box = None; text = None
131
  for k in ['points', 'box', 'dt_boxes', 'poly']:
132
  if k in data: box = parse_box(data[k]); break
133
+ for k in ['transcription', 'text', 'rec_text', 'label']:
134
  if k in data: text = data[k]; break
135
  if box and text: items_to_draw.append((box, text)); return
136
  for v in data.values(): hunt(v)
 
143
  for item in data: hunt(item)
144
  hunt(raw_data)
145
 
 
146
  for box, txt in items_to_draw:
 
147
  try:
148
+ draw.polygon(box, outline="red", width=3)
 
 
149
  txt_x, txt_y = box[0]
150
  if hasattr(draw, "textbbox"):
151
+ text_bbox = draw.textbbox((txt_x, txt_y), txt, font=font, anchor="lb")
152
+ draw.rectangle(text_bbox, fill="red")
153
  draw.text((txt_x, txt_y), txt, fill="white", font=font, anchor="lb")
154
  else:
155
  draw.text((txt_x, txt_y - font_size), txt, fill="white", font=font)
 
157
 
158
  return canvas
159
 
160
+ # --- HÀM XỬ LÝ TEXT ---
161
  def deep_extract_text(data):
162
+ found_texts = []
163
+ if isinstance(data, str):
164
+ if len(data.strip()) > 0: return [data]
165
+ return []
166
+ if isinstance(data, (list, tuple)):
167
+ for item in data: found_texts.extend(deep_extract_text(item))
168
  elif isinstance(data, dict):
169
+ for val in data.values(): found_texts.extend(deep_extract_text(val))
170
+ elif hasattr(data, '__dict__'): found_texts.extend(deep_extract_text(data.__dict__))
171
+ return found_texts
172
 
173
  def clean_text_result(text_list):
174
  cleaned = []
175
+ block_list = ['min', 'max', 'general', 'header', 'footer', 'structure']
176
  for t in text_list:
177
  t = t.strip()
178
+ if len(t) < 1: continue
179
+ if t.lower().endswith(('.ttf', '.json', '.pdparams', '.yml', '.log')): continue
180
+ if t.lower() in block_list: continue
181
  cleaned.append(t)
182
  return cleaned
183
 
184
  # --- MAIN PREDICT ---
185
  def predict(image, lang_code):
186
  if image is None: return None, "Chưa có ảnh.", "No Data"
187
+
188
+ # Lấy model từ dict
189
+ current_ocr = ocr_engines.get(lang_code)
190
+
191
+ # Kiểm tra kỹ model có tồn tại không
192
+ if not current_ocr:
193
+ loaded_keys = list(ocr_engines.keys())
194
+ msg = f"Lỗi: Model '{lang_code}' chưa tải được. Danh sách model đang có: {loaded_keys}.\nVui lòng xem log khởi tạo (terminal) để biết lý do."
195
+ return image, msg, "Initialization Failed"
196
 
197
+ current_font = FONT_MAP.get(lang_code, FONT_LATIN_PATH)
198
+
199
  try:
200
+ original_pil = image.copy() if isinstance(image, Image.Image) else Image.fromarray(image).copy()
201
+ image_np = np.array(image)
 
 
 
 
 
 
 
 
 
 
 
 
 
202
 
203
+ # 1. RUN OCR
204
+ raw_result = current_ocr.ocr(image_np)
 
205
 
206
+ # 2. XỬ LÝ ẢNH
207
+ target_image_for_drawing = original_pil
208
+ if isinstance(raw_result, list) and len(raw_result) > 0 and isinstance(raw_result[0], dict):
209
+ if 'doc_preprocessor_res' in raw_result[0]:
210
+ proc_res = raw_result[0]['doc_preprocessor_res']
211
+ if 'output_img' in proc_res:
212
+ numpy_img = proc_res['output_img']
213
+ target_image_for_drawing = Image.fromarray(numpy_img)
214
+
215
+ # 3. VẼ & TEXT
216
+ annotated_image = universal_draw(target_image_for_drawing, raw_result, current_font)
217
+ all_texts = deep_extract_text(raw_result)
218
+ final_texts = clean_text_result(all_texts)
219
+ text_output = "\n".join(final_texts) if final_texts else "Không tìm thấy văn bản."
220
+
221
+ debug_info = f"Engine: {lang_code} (Mapped to Paddle 'en' for Latin)\nFont: {current_font}\nRaw Data Head:\n{str(raw_result)[:800]}..."
222
+
223
+ return annotated_image, text_output, debug_info
224
+
225
  except Exception as e:
226
  import traceback
227
+ return image, f"Runtime Error: {str(e)}", traceback.format_exc()
228
 
229
  # --- GIAO DIỆN ---
230
+ with gr.Blocks(title="PaddleOCR Ultimate") as iface:
231
+ gr.Markdown("## PaddleOCR Multi-Language (v4/v3)")
232
 
233
  with gr.Row():
234
+ with gr.Column():
235
+ input_img = gr.Image(type="pil", label="Input Image")
 
 
236
  lang_dropdown = gr.Dropdown(
237
+ choices=["ch", "latin"],
238
+ value="latin",
239
+ label="Chọn Ngôn ngữ / Language",
240
+ info="ch: Chinese | latin: English, Vietnamese, French, German, etc."
241
  )
242
+ submit_btn = gr.Button("RUN OCR", variant="primary")
243
+
244
+ with gr.Column():
 
245
  with gr.Tabs():
246
  with gr.TabItem("🖼️ Kết quả"):
247
+ output_img = gr.Image(type="pil", label="Overlay")
248
  with gr.TabItem("📝 Text"):
249
+ output_txt = gr.Textbox(label="Content", lines=15)
250
  with gr.TabItem("🐞 Debug"):
251
+ output_debug = gr.Textbox(label="Log", lines=15)
252
 
253
+ submit_btn.click(
254
+ fn=predict,
255
+ inputs=[input_img, lang_dropdown],
256
+ outputs=[output_img, output_txt, output_debug]
257
+ )
258
 
259
  if __name__ == "__main__":
260
+ iface.launch(server_name="0.0.0.0", server_port=7860)