1OCR

Sleeping

App Files Files Community

xontoloyoo commited on May 6, 2025

Commit

b5e389f

verified ·

1 Parent(s): 78e616e

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -171

app.py CHANGED Viewed

@@ -1,15 +1,13 @@
 import atexit
 import functools
-import re
-import cv2
-import numpy as np
 from queue import Queue
 from threading import Event, Thread
 from paddleocr import PaddleOCR, draw_ocr
 from PIL import Image
 import gradio as gr
-# ================== KONFIGURASI UTAMA ==================
 LANG_CONFIG = {
     "ch": {"num_workers": 2},
     "en": {"num_workers": 2},
@@ -19,92 +17,8 @@ LANG_CONFIG = {
     "japan": {"num_workers": 1},
 }
 CONCURRENCY_LIMIT = 8
-MIN_CONFIDENCE = 0.6  # Threshold confidence
-# ========================================================
-# ****************** PREPROCESSING ********************
-def preprocess_image(img_path):
-    """Enhance image quality sebelum OCR"""
-    try:
-        img = cv2.imread(img_path)
-        # Convert ke grayscale
-        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-        # Denoising dengan Non-Local Means
-        denoised = cv2.fastNlMeansDenoising(
-            gray,
-            h=15,  # Parameter kekuatan denoising (sesuaikan)
-            templateWindowSize=7,
-            searchWindowSize=21
-        )
-        # Sharpening dengan kernel custom
-        kernel = np.array([[-1, -1, -1],
-                          [-1, 9, -1],
-                          [-1, -1, -1]])
-        sharpened = cv2.filter2D(denoised, -1, kernel)
-        # Adaptive Thresholding
-        thresholded = cv2.adaptiveThreshold(
-            sharpened,
-            255,
-            cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
-            cv2.THRESH_BINARY,
-            11,  # Ukuran blok
-            2  # Konstanta pengurangan
-        )
-        # Simpan gambar hasil preprocessing
-        cv2.imwrite("temp_processed.jpg", thresholded)
-        return "temp_processed.jpg"
-    except Exception as e:
-        print(f"Error preprocessing: {e}")
-        return img_path  # Fallback ke gambar asli
-# ****************** POST-PROCESSING ********************
-CORRECTION_RULES = {
-    r"\bKEMENTERAN\b": "KEMENTERIAN",
-    r"\bKAR\.\b": "KAB.",
-    r"\bTHN,\b": "TAHUN",
-    r"RP\s*,\s*": "Rp",
-    r"(\d{1,3}(?:\.\d{3})*)(,?)": r"\1\2",  # Format ribuan
-    r"(\d+)\s*-\s*\(": r"\1 (",
-    r"CV\.(\w)": r"CV. \1"  # Spasi setelah CV
-}
-def format_currency(match):
-    """Helper untuk formatting mata uang"""
-    amount = match.group(1).replace('.', '').replace(',', '.')
-    return f'Rp{amount},00'
-def apply_post_ocr_corrections(text):
-    """Koreksi pola umum dalam dokumen kontrak"""
-    # Format mata uang: Rp, 87,640,000,- → Rp87.640.000,00
-    text = re.sub(
-        r'Rp\s*([\d.,]+)\s*-',
-        format_currency,
-        text,
-        flags=re.IGNORECASE
-    )
-    # Aplikasi koreksi regex
-    for pattern, replacement in CORRECTION_RULES.items():
-        text = re.sub(pattern, replacement, text)
-    # Format baris bernomor
-    lines = text.split('\n')
-    formatted_lines = []
-    for line in lines:
-        if re.match(r'^\d+[).]', line.strip()):
-            formatted_lines.append(f"\n{line.strip()}")
-        else:
-            formatted_lines.append(line.strip())
-    return '\n'.join(formatted_lines)
-# ****************** MODEL & INFERENCE ********************
 class PaddleOCRModelManager(object):
     def __init__(self,
                  num_workers,
@@ -153,59 +67,42 @@ class PaddleOCRModelManager(object):
             finally:
                 self._queue.task_done()
 def create_model(lang):
-    return PaddleOCR(lang=lang, use_angle_cls=True, use_gpu=False, det_db_score_mode='slow')
 model_managers = {}
 for lang, config in LANG_CONFIG.items():
     model_manager = PaddleOCRModelManager(config["num_workers"], functools.partial(create_model, lang=lang))
     model_managers[lang] = model_manager
 def inference(img, lang):
-    try:
-        # 1. PREPROCESSING
-        processed_img_path = preprocess_image(img)
-        # 2. OCR INFERENCE
-        ocr = model_managers[lang]
-        result = ocr.infer(processed_img_path, cls=True)[0]
-        # 3. PROCESS RESULTS
-        image = Image.open(img).convert("RGB")
-        boxes = [line[0] for line in result]
-        txts = []
-        confidences = []
-        for line in result:
-            text = line[1][0]
-            confidence = line[1][1]
-            # Flag teks dengan confidence rendah
-            if confidence < MIN_CONFIDENCE:
-                txts.append(f"[? {text} ?]")
-            else:
-                txts.append(text)
-            confidences.append(confidence)
-        # 4. POST-PROCESSING
-        raw_text = "\n".join(txts)
-        cleaned_text = apply_post_ocr_corrections(raw_text)
-        # 5. DRAW OUTPUT
-        im_show = draw_ocr(
-            image,
-            boxes,
-            txts,
-            confidences,
-            font_path="./simfang.ttf"
-        )
-        return im_show, cleaned_text, f"Confidence Scores: {confidences}"
-    except Exception as e:
-        return None, f"Error: {str(e)}", ""
-# ****************** GRADO UI ********************
 title = 'PaddleOCR'
 description = '''
 - Gradio demo for PaddleOCR. PaddleOCR demo supports Chinese, English, French, German, Korean and Japanese.
@@ -219,43 +116,21 @@ examples = [
     ['jp_example.jpg','japan'],
 ]
-css = """
-.output_image, .input_image {height: 40rem !important; width: 100% !important;}
-.markdown-text {font-family: monospace !important;}
-"""
-with gr.Blocks(css=css) as demo:
-    gr.Markdown("## PaddleOCR Enhanced Dokumen")
-    with gr.Row():
-        with gr.Column():
-            img_input = gr.Image(type='filepath', label='Upload Dokumen')
-            lang_dropdown = gr.Dropdown(
-                choices=list(LANG_CONFIG.keys()),
-                value='en',
-                label='Pilih Bahasa'
-            )
-            submit_btn = gr.Button("Proses OCR")
-        with gr.Column():
-            img_output = gr.Image(type='pil', label='Hasil Anotasi')
-            text_output = gr.Textbox(
-                label="Teks Hasil OCR",
-                lines=15,
-                show_copy_button=True,
-                placeholder="Teks hasil OCR akan muncul di sini..."
-            )
-            debug_output = gr.Textbox(
-                label="Debug Info",
-                visible=False  # Ubah ke True jika perlu debug
-            )
-    # Contoh dan handler
-    gr.Examples(examples, inputs=[img_input, lang_dropdown])
-    submit_btn.click(
-        fn=inference,
-        inputs=[img_input, lang_dropdown],
-        outputs=[img_output, text_output, debug_output]
-    )
-demo.launch(debug=False)

 import atexit
 import functools
 from queue import Queue
 from threading import Event, Thread
 from paddleocr import PaddleOCR, draw_ocr
 from PIL import Image
 import gradio as gr
 LANG_CONFIG = {
     "ch": {"num_workers": 2},
     "en": {"num_workers": 2},
     "japan": {"num_workers": 1},
 }
 CONCURRENCY_LIMIT = 8
 class PaddleOCRModelManager(object):
     def __init__(self,
                  num_workers,
             finally:
                 self._queue.task_done()
 def create_model(lang):
+    return PaddleOCR(lang=lang, use_angle_cls=True, use_gpu=False)
 model_managers = {}
 for lang, config in LANG_CONFIG.items():
     model_manager = PaddleOCRModelManager(config["num_workers"], functools.partial(create_model, lang=lang))
     model_managers[lang] = model_manager
+def close_model_managers():
+    for manager in model_managers.values():
+        manager.close()
+# XXX: Not sure if gradio allows adding custom teardown logic
+atexit.register(close_model_managers)
 def inference(img, lang):
+    ocr = model_managers[lang]
+    result = ocr.infer(img, cls=True)[0]
+    img_path = img
+    image = Image.open(img_path).convert("RGB")
+    boxes = [line[0] for line in result]
+    txts = [line[1][0] for line in result]
+    scores = [line[1][1] for line in result]
+    im_show = draw_ocr(image, boxes, txts, scores, font_path="./simfang.ttf")
+    # Tambahkan ini untuk text yang bisa disalin
+    combined_text = "\n".join(txts)  # Gabungkan semua teks dengan newline
+    return im_show, combined_text  # Return kedua output
 title = 'PaddleOCR'
 description = '''
 - Gradio demo for PaddleOCR. PaddleOCR demo supports Chinese, English, French, German, Korean and Japanese.
     ['jp_example.jpg','japan'],
 ]
+css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"
+gr.Interface(
+    inference,
+    [
+        gr.Image(type='filepath', label='Input'),
+        gr.Dropdown(choices=list(LANG_CONFIG.keys()), value='en', label='Language')
+    ],
+    [  # Sekarang ada 2 output
+        gr.Image(type='pil', label='Annotated Image'),
+        gr.Textbox(label="Extracted Text", lines=10, interactive=True)
+    ],
+    title=title,
+    description=description,
+    examples=examples,
+    cache_examples=False,
+    css=css,
+    concurrency_limit=CONCURRENCY_LIMIT,
+).launch(debug=False)