captcha

Sleeping

App Files Files Community

cubuvl commited on Jun 8, 2025

Commit

cbe0e5f

verified ·

1 Parent(s): edbb0bf

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -31

app.py CHANGED Viewed

@@ -1,47 +1,75 @@
 import gradio as gr
 from transformers import TrOCRProcessor, VisionEncoderDecoderModel
-from PIL import Image
 import numpy as np
 # Load model và processor
-model_name = "chanelcolgate/trocr-base-printed_captcha_ocr"
-model = VisionEncoderDecoderModel.from_pretrained(model_name)
 processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
-# Hàm xử lý ảnh captcha
 def process_image(image):
-    # Chuyển numpy -> PIL nếu cần
-    if isinstance(image, np.ndarray):
-        image = Image.fromarray(image)
-    # Thêm nền trắng nếu ảnh có alpha
-    if image.mode in ("RGBA", "LA"):
-        background = Image.new("RGB", image.size, (255, 255, 255))
-        background.paste(image, mask=image.split()[-1])
-        image = background
-    else:
-        image = image.convert("RGB")
-    # OCR
-    pixel_values = processor(image, return_tensors="pt").pixel_values
     generated_ids = model.generate(pixel_values)
     generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-    return image, generated_text
-# Giao diện Gradio
-title = "Captcha OCR Demo"
-description = "Nhận diện captcha từ mã số thuế (MST) – Model TrOCR"
-interface = gr.Interface(
-    fn=process_image,
-    inputs="image",
-    outputs=["image", "text"],
-    examples=[f"examples/captcha-{i}.png" for i in range(10)],
-    title="Captcha OCR Demo",
-    description="Xem ảnh đã xử lý (nền trắng) và kết quả OCR",
-)
 if __name__ == "__main__":
-    interface.launch()

 import gradio as gr
 from transformers import TrOCRProcessor, VisionEncoderDecoderModel
+from PIL import Image, ImageOps
 import numpy as np
+import io
+import base64
 # Load model và processor
+name = "chanelcolgate/trocr-base-printed_captcha_ocr"
+model = VisionEncoderDecoderModel.from_pretrained(name)
 processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
+def prepare_image(pil_image):
+    """Xử lý nền trắng nếu ảnh có nền trong suốt"""
+    if pil_image.mode in ("RGBA", "LA"):
+        background = Image.new("RGB", pil_image.size, (255, 255, 255))
+        background.paste(pil_image, mask=pil_image.split()[-1])
+        return background
+    return pil_image.convert("RGB")
 def process_image(image):
+    pil_image = Image.fromarray(image)
+    image_clean = prepare_image(pil_image)
+    pixel_values = processor(image_clean, return_tensors="pt").pixel_values
+    generated_ids = model.generate(pixel_values)
+    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    return image_clean, generated_text
+def process_base64(base64_str):
+    # Tách phần prefix (data:image/png;base64,...) nếu có
+    if ',' in base64_str:
+        base64_str = base64_str.split(',')[1]
+    image_data = base64.b64decode(base64_str)
+    image = Image.open(io.BytesIO(image_data))
+    image_clean = prepare_image(image)
+    pixel_values = processor(image_clean, return_tensors="pt").pixel_values
     generated_ids = model.generate(pixel_values)
     generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    return image_clean, generated_text
+with gr.Blocks() as demo:
+    gr.Markdown("## Captcha OCR Demo")
+    with gr.Tab("Upload image"):
+        with gr.Row():
+            image_input = gr.Image(type="numpy", label="Upload Image")
+            image_output = gr.Image(type="pil", label="Processed Image")
+        text_output = gr.Textbox(label="OCR Output")
+        image_button = gr.Button("Submit")
+        image_button.click(fn=process_image, inputs=image_input, outputs=[image_output, text_output])
+    with gr.Tab("Paste base64"):
+        with gr.Row():
+            base64_input = gr.Textbox(label="Paste base64 here", lines=5, placeholder="data:image/png;base64,...")
+        with gr.Row():
+            base64_output_img = gr.Image(type="pil", label="Processed Image")
+            base64_output_txt = gr.Textbox(label="OCR Output")
+        base64_button = gr.Button("Submit")
+        base64_button.click(fn=process_base64, inputs=base64_input, outputs=[base64_output_img, base64_output_txt])
+    gr.Examples(
+        examples=[f"examples/captcha-{i}.png" for i in range(10)],
+        inputs=image_input
+    )
 if __name__ == "__main__":
+    demo.launch()