File size: 3,508 Bytes
1c3aebe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132

import cv2
import fitz
import numpy as np
from PIL import Image, ImageDraw, ImageFont


def concate_images_vertically(images, confidence_values=[]):
    """
    将图片竖向拼接
    """
    widths, heights = zip(*(i.size for i in images))
    max_width = max(widths)

    new_images = []
    for i, image in enumerate(images):

        if image.size[0] < max_width:
            image = image.resize((max_width, image.size[1]))

        if len(confidence_values) > 0:
            draw = ImageDraw.Draw(image)
            font = ImageFont.truetype("assets/SIMKAI.TTF", 100)
            text_color = (255, 0, 0)
            draw.text((100, image.size[1] - 100), '概率:' + '{:.3f}'.format(confidence_values[i]), font=font, fill=text_color)

        new_images.append(image)

    res_image = np.concatenate(new_images, axis=0)
    res_image = Image.fromarray(res_image)

    return res_image


def concate_images_horizontally(images):
    """
    将图片竖向拼接
    """
    widths, heights = zip(*(i.size for i in images))
    max_height = max(heights)

    new_images = []
    for i, image in enumerate(images):

        if image.size[1] < max_height:
            image = image.resize((image.size[0], max_height))

        new_images.append(image)

    res_image = np.concatenate(new_images, axis=1)
    res_image = Image.fromarray(res_image)

    return res_image


def crop_image_from_background(image):

    image = np.array(image)

    # Convert the image to HSV color space
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

    # Define range for white color in HSV
    lower_white = np.array([0, 0, 200])
    upper_white = np.array([180, 200, 255])

    # Threshold the HSV image to get only white colors (background)
    mask = cv2.inRange(hsv, lower_white, upper_white)
    mask = cv2.bitwise_not(mask)

    mask = cv2.erode(mask, None, iterations=1)
    mask = cv2.dilate(mask, None, iterations=10)

    # 获取最外层轮廓
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    boxes = []
    for cnt in contours:
        # 获取当前轮廓的最小面积矩形
        rect = cv2.minAreaRect(cnt)

        # TODO: 可通过位置关系剔除边缘位置小矩形
        w, h = rect[1]
        if w < 10 or h < 10:
            continue

        # 计算矩形的中心
        box = cv2.boxPoints(rect)
        box = np.intp(box)
        boxes.append(box)

    if len(boxes) > 0:
        boxes = np.array(boxes)
        min_x, min_y = np.min(boxes, axis=(0, 1))
        max_x, max_y = np.max(boxes, axis=(0, 1))

        return image[min_y:max_y, min_x:max_x]
    else:
        return image


def get_products_from_pdf_file(file):
    """
    1. 用户可上传多份PDF文件
    2. 每份PDF文件可以包含一份作品,也可以是多份作品
    """

    products = []

    if file.name.endswith('.pdf'):

        images = []

        pdf = fitz.open(file.name)
        for page_num, page in enumerate(pdf, start=1):
            # 渲染页面为图像
            pix = page.get_pixmap()
            image = Image.frombytes('RGB', [pix.width, pix.height], pix.samples)
            images.append(image)
        pdf.close()

        # 一份PDF文件仅包含一份作品
        if len(images) == 2:
            products.append(images)

        # 一份PDF文件包含多份作品
        if len(images) > 2:
            for i in range(0, len(images) // 2):
                products.append(images[i * 2:i * 2 + 2])

    return products