xontoloyoo commited on
Commit
b5e389f
·
verified ·
1 Parent(s): 78e616e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -171
app.py CHANGED
@@ -1,15 +1,13 @@
1
  import atexit
2
  import functools
3
- import re
4
- import cv2
5
- import numpy as np
6
  from queue import Queue
7
  from threading import Event, Thread
 
8
  from paddleocr import PaddleOCR, draw_ocr
9
  from PIL import Image
10
  import gradio as gr
11
 
12
- # ================== KONFIGURASI UTAMA ==================
13
  LANG_CONFIG = {
14
  "ch": {"num_workers": 2},
15
  "en": {"num_workers": 2},
@@ -19,92 +17,8 @@ LANG_CONFIG = {
19
  "japan": {"num_workers": 1},
20
  }
21
  CONCURRENCY_LIMIT = 8
22
- MIN_CONFIDENCE = 0.6 # Threshold confidence
23
- # ========================================================
24
-
25
- # ****************** PREPROCESSING ********************
26
- def preprocess_image(img_path):
27
- """Enhance image quality sebelum OCR"""
28
- try:
29
- img = cv2.imread(img_path)
30
-
31
- # Convert ke grayscale
32
- gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
33
-
34
- # Denoising dengan Non-Local Means
35
- denoised = cv2.fastNlMeansDenoising(
36
- gray,
37
- h=15, # Parameter kekuatan denoising (sesuaikan)
38
- templateWindowSize=7,
39
- searchWindowSize=21
40
- )
41
-
42
- # Sharpening dengan kernel custom
43
- kernel = np.array([[-1, -1, -1],
44
- [-1, 9, -1],
45
- [-1, -1, -1]])
46
- sharpened = cv2.filter2D(denoised, -1, kernel)
47
-
48
- # Adaptive Thresholding
49
- thresholded = cv2.adaptiveThreshold(
50
- sharpened,
51
- 255,
52
- cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
53
- cv2.THRESH_BINARY,
54
- 11, # Ukuran blok
55
- 2 # Konstanta pengurangan
56
- )
57
-
58
- # Simpan gambar hasil preprocessing
59
- cv2.imwrite("temp_processed.jpg", thresholded)
60
- return "temp_processed.jpg"
61
-
62
- except Exception as e:
63
- print(f"Error preprocessing: {e}")
64
- return img_path # Fallback ke gambar asli
65
-
66
- # ****************** POST-PROCESSING ********************
67
- CORRECTION_RULES = {
68
- r"\bKEMENTERAN\b": "KEMENTERIAN",
69
- r"\bKAR\.\b": "KAB.",
70
- r"\bTHN,\b": "TAHUN",
71
- r"RP\s*,\s*": "Rp",
72
- r"(\d{1,3}(?:\.\d{3})*)(,?)": r"\1\2", # Format ribuan
73
- r"(\d+)\s*-\s*\(": r"\1 (",
74
- r"CV\.(\w)": r"CV. \1" # Spasi setelah CV
75
- }
76
 
77
- def format_currency(match):
78
- """Helper untuk formatting mata uang"""
79
- amount = match.group(1).replace('.', '').replace(',', '.')
80
- return f'Rp{amount},00'
81
 
82
- def apply_post_ocr_corrections(text):
83
- """Koreksi pola umum dalam dokumen kontrak"""
84
- # Format mata uang: Rp, 87,640,000,- → Rp87.640.000,00
85
- text = re.sub(
86
- r'Rp\s*([\d.,]+)\s*-',
87
- format_currency,
88
- text,
89
- flags=re.IGNORECASE
90
- )
91
-
92
- # Aplikasi koreksi regex
93
- for pattern, replacement in CORRECTION_RULES.items():
94
- text = re.sub(pattern, replacement, text)
95
-
96
- # Format baris bernomor
97
- lines = text.split('\n')
98
- formatted_lines = []
99
- for line in lines:
100
- if re.match(r'^\d+[).]', line.strip()):
101
- formatted_lines.append(f"\n{line.strip()}")
102
- else:
103
- formatted_lines.append(line.strip())
104
-
105
- return '\n'.join(formatted_lines)
106
-
107
- # ****************** MODEL & INFERENCE ********************
108
  class PaddleOCRModelManager(object):
109
  def __init__(self,
110
  num_workers,
@@ -153,59 +67,42 @@ class PaddleOCRModelManager(object):
153
  finally:
154
  self._queue.task_done()
155
 
 
156
  def create_model(lang):
157
- return PaddleOCR(lang=lang, use_angle_cls=True, use_gpu=False, det_db_score_mode='slow')
 
158
 
159
  model_managers = {}
160
  for lang, config in LANG_CONFIG.items():
161
  model_manager = PaddleOCRModelManager(config["num_workers"], functools.partial(create_model, lang=lang))
162
  model_managers[lang] = model_manager
163
 
 
 
 
 
 
 
 
 
 
 
164
  def inference(img, lang):
165
- try:
166
- # 1. PREPROCESSING
167
- processed_img_path = preprocess_image(img)
168
-
169
- # 2. OCR INFERENCE
170
- ocr = model_managers[lang]
171
- result = ocr.infer(processed_img_path, cls=True)[0]
172
-
173
- # 3. PROCESS RESULTS
174
- image = Image.open(img).convert("RGB")
175
- boxes = [line[0] for line in result]
176
- txts = []
177
- confidences = []
178
-
179
- for line in result:
180
- text = line[1][0]
181
- confidence = line[1][1]
182
-
183
- # Flag teks dengan confidence rendah
184
- if confidence < MIN_CONFIDENCE:
185
- txts.append(f"[? {text} ?]")
186
- else:
187
- txts.append(text)
188
- confidences.append(confidence)
189
-
190
- # 4. POST-PROCESSING
191
- raw_text = "\n".join(txts)
192
- cleaned_text = apply_post_ocr_corrections(raw_text)
193
-
194
- # 5. DRAW OUTPUT
195
- im_show = draw_ocr(
196
- image,
197
- boxes,
198
- txts,
199
- confidences,
200
- font_path="./simfang.ttf"
201
- )
202
-
203
- return im_show, cleaned_text, f"Confidence Scores: {confidences}"
204
 
205
- except Exception as e:
206
- return None, f"Error: {str(e)}", ""
 
 
 
207
 
208
- # ****************** GRADO UI ********************
209
  title = 'PaddleOCR'
210
  description = '''
211
  - Gradio demo for PaddleOCR. PaddleOCR demo supports Chinese, English, French, German, Korean and Japanese.
@@ -219,43 +116,21 @@ examples = [
219
  ['jp_example.jpg','japan'],
220
  ]
221
 
222
- css = """
223
- .output_image, .input_image {height: 40rem !important; width: 100% !important;}
224
- .markdown-text {font-family: monospace !important;}
225
- """
226
-
227
- with gr.Blocks(css=css) as demo:
228
- gr.Markdown("## PaddleOCR Enhanced Dokumen")
229
-
230
- with gr.Row():
231
- with gr.Column():
232
- img_input = gr.Image(type='filepath', label='Upload Dokumen')
233
- lang_dropdown = gr.Dropdown(
234
- choices=list(LANG_CONFIG.keys()),
235
- value='en',
236
- label='Pilih Bahasa'
237
- )
238
- submit_btn = gr.Button("Proses OCR")
239
-
240
- with gr.Column():
241
- img_output = gr.Image(type='pil', label='Hasil Anotasi')
242
- text_output = gr.Textbox(
243
- label="Teks Hasil OCR",
244
- lines=15,
245
- show_copy_button=True,
246
- placeholder="Teks hasil OCR akan muncul di sini..."
247
- )
248
- debug_output = gr.Textbox(
249
- label="Debug Info",
250
- visible=False # Ubah ke True jika perlu debug
251
- )
252
-
253
- # Contoh dan handler
254
- gr.Examples(examples, inputs=[img_input, lang_dropdown])
255
- submit_btn.click(
256
- fn=inference,
257
- inputs=[img_input, lang_dropdown],
258
- outputs=[img_output, text_output, debug_output]
259
- )
260
-
261
- demo.launch(debug=False)
 
1
  import atexit
2
  import functools
 
 
 
3
  from queue import Queue
4
  from threading import Event, Thread
5
+
6
  from paddleocr import PaddleOCR, draw_ocr
7
  from PIL import Image
8
  import gradio as gr
9
 
10
+
11
  LANG_CONFIG = {
12
  "ch": {"num_workers": 2},
13
  "en": {"num_workers": 2},
 
17
  "japan": {"num_workers": 1},
18
  }
19
  CONCURRENCY_LIMIT = 8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
 
 
 
 
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  class PaddleOCRModelManager(object):
23
  def __init__(self,
24
  num_workers,
 
67
  finally:
68
  self._queue.task_done()
69
 
70
+
71
  def create_model(lang):
72
+ return PaddleOCR(lang=lang, use_angle_cls=True, use_gpu=False)
73
+
74
 
75
  model_managers = {}
76
  for lang, config in LANG_CONFIG.items():
77
  model_manager = PaddleOCRModelManager(config["num_workers"], functools.partial(create_model, lang=lang))
78
  model_managers[lang] = model_manager
79
 
80
+
81
+ def close_model_managers():
82
+ for manager in model_managers.values():
83
+ manager.close()
84
+
85
+
86
+ # XXX: Not sure if gradio allows adding custom teardown logic
87
+ atexit.register(close_model_managers)
88
+
89
+
90
  def inference(img, lang):
91
+ ocr = model_managers[lang]
92
+ result = ocr.infer(img, cls=True)[0]
93
+ img_path = img
94
+ image = Image.open(img_path).convert("RGB")
95
+ boxes = [line[0] for line in result]
96
+ txts = [line[1][0] for line in result]
97
+ scores = [line[1][1] for line in result]
98
+ im_show = draw_ocr(image, boxes, txts, scores, font_path="./simfang.ttf")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
 
100
+ # Tambahkan ini untuk text yang bisa disalin
101
+ combined_text = "\n".join(txts) # Gabungkan semua teks dengan newline
102
+
103
+ return im_show, combined_text # Return kedua output
104
+
105
 
 
106
  title = 'PaddleOCR'
107
  description = '''
108
  - Gradio demo for PaddleOCR. PaddleOCR demo supports Chinese, English, French, German, Korean and Japanese.
 
116
  ['jp_example.jpg','japan'],
117
  ]
118
 
119
+ css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"
120
+ gr.Interface(
121
+ inference,
122
+ [
123
+ gr.Image(type='filepath', label='Input'),
124
+ gr.Dropdown(choices=list(LANG_CONFIG.keys()), value='en', label='Language')
125
+ ],
126
+ [ # Sekarang ada 2 output
127
+ gr.Image(type='pil', label='Annotated Image'),
128
+ gr.Textbox(label="Extracted Text", lines=10, interactive=True)
129
+ ],
130
+ title=title,
131
+ description=description,
132
+ examples=examples,
133
+ cache_examples=False,
134
+ css=css,
135
+ concurrency_limit=CONCURRENCY_LIMIT,
136
+ ).launch(debug=False)