Spaces:

vithacocf
/

ocr

Sleeping

App Files Files Community

vithacocf commited on Jul 10

Commit

dcc9745

verified ·

1 Parent(s): 76a5fff

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -13

app.py CHANGED Viewed

@@ -78,26 +78,40 @@ def convert_png_to_jpg(image):
     return converted
 # Hàm chính
-def predict(image, prompt=None):
-    # Kiểm tra ảnh hợp lệ
-    if not is_supported_image(image):
-        return "Không hỗ trợ định dạng file này. Vui lòng tải ảnh đúng."
-    # Prompt rỗng
     if prompt is None or prompt.strip() == "":
-        return "Vui lòng nhập prompt để trích xuất dữ liệu từ ảnh."
     try:
-        # Nếu ảnh là PNG có alpha, convert sang RGB
-        if image.mode == "RGBA" or image.mode == "LA":
-            image = convert_png_to_jpg(image)
-        image = image.convert("RGB")
     except UnidentifiedImageError:
-        return "Không thể đọc ảnh. Vui lòng kiểm tra lại định dạng hoặc ảnh bị lỗi."
     except Exception as e:
-        return f"Lỗi khi xử lý ảnh: {str(e)}"
     # Inference
     inputs = processor(images=image, text=prompt, return_tensors="pt").to(device)
@@ -118,7 +132,8 @@ def predict(image, prompt=None):
 demo = gr.Interface(
     fn=predict,
     inputs=[
-        gr.Image(type="pil", label="Tải ảnh tài liệu lên"),
         gr.Textbox(label="Gợi ý (tuỳ chọn)", placeholder="VD: Trích số hóa đơn")
     ],
     outputs="text",

     return converted
 # Hàm chính
+def predict(image_path, prompt=None):
+    if not isinstance(image_path, str) or not os.path.exists(image_path):
+        return "=Không tìm thấy ảnh. Vui lòng thử lại sau khi upload thành công."
     if prompt is None or prompt.strip() == "":
+        return "=Vui lòng nhập prompt để trích xuất dữ liệu."
     try:
+        image = Image.open(image_path).convert("RGB")
+        if image.mode in ["RGBA", "LA"]:
+            new_img = Image.new("RGB", image.size, (255, 255, 255))
+            new_img.paste(image)
+            image = new_img
     except UnidentifiedImageError:
+        return "=Không thể đọc ảnh. Ảnh có thể bị hỏng hoặc sai định dạng."
     except Exception as e:
+        return f"=Lỗi khi xử lý ảnh: {str(e)}"
+    inputs = processor(images=image, text=prompt, return_tensors="pt").to(device)
+    generated_ids = model.generate(
+        **inputs,
+        max_new_tokens=512,
+        do_sample=False,
+        use_cache=False,
+        eos_token_id=processor.tokenizer.eos_token_id,
+        pad_token_id=processor.tokenizer.pad_token_id
+    )
+    result = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    return result
     # Inference
     inputs = processor(images=image, text=prompt, return_tensors="pt").to(device)
 demo = gr.Interface(
     fn=predict,
     inputs=[
+        # gr.Image(type="pil", label="Tải ảnh tài liệu lên"),
+        gr.Image(type="filepath", label="Tải ảnh tài liệu lên"),
         gr.Textbox(label="Gợi ý (tuỳ chọn)", placeholder="VD: Trích số hóa đơn")
     ],
     outputs="text",