File size: 1,895 Bytes
e8e33af
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
from PIL import Image
from huggingface_hub import hf_hub_download
from doclayout_yolo import YOLOv10
from ..storage.schemas import BaseBox
import tempfile
from pathlib import Path

filepath = hf_hub_download(
    repo_id="juliozhao/DocLayout-YOLO-DocStructBench",
    filename="doclayout_yolo_docstructbench_imgsz1024.pt"
)
model = YOLOv10(filepath)


def parse_img(
    img: Image.Image,
    device: str = "cpu",
    box_directory: str = "src/boxes",
):
    """
    Processes an image, runs detection, crops boxes, saves their images,
    and returns a list of BaseBox objects with box metadata.
    """
    # Create box directory if it doesn't exist
    Path(box_directory).mkdir(parents=True, exist_ok=True)

    # Create temp file with delete=False so it stays on disk
    with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_file:
        img.save(temp_file.name, format="PNG")
        img_path = temp_file.name

    # Now model.predict can access the file
    det_res = model.predict(
        img_path,
        imgsz=1024,
        conf=0.2,
        device=device
    )

    boxes_data = det_res[0].boxes.data
    boxes_result = []
    crop_image_list = []
    for i, box_data in enumerate(boxes_data):
        box_data = box_data.tolist()
        crop = img.crop(tuple(box_data[:4]))
        box_path = str(Path(box_directory) / f"box_{i}.png")
        crop.save(box_path)
        crop_image_list.append(crop)

        box_info = BaseBox(
            class_name=int(box_data[-1]),
            x_min=float(box_data[0]),
            y_min=float(box_data[1]),
            x_max=float(box_data[2]),
            y_max=float(box_data[3]),
            confidence=float(box_data[-2]),
            saved_img_path=box_path
        )
        boxes_result.append(box_info)

    # Clean up temp file
    Path(img_path).unlink(missing_ok=True)

    return boxes_result, crop_image_list