import cv2 import fitz import numpy as np from PIL import Image, ImageDraw, ImageFont def concate_images_vertically(images, confidence_values=[]): """ 将图片竖向拼接 """ widths, heights = zip(*(i.size for i in images)) max_width = max(widths) new_images = [] for i, image in enumerate(images): if image.size[0] < max_width: image = image.resize((max_width, image.size[1])) if len(confidence_values) > 0: draw = ImageDraw.Draw(image) font = ImageFont.truetype("assets/SIMKAI.TTF", 100) text_color = (255, 0, 0) draw.text((100, image.size[1] - 100), '概率:' + '{:.3f}'.format(confidence_values[i]), font=font, fill=text_color) new_images.append(image) res_image = np.concatenate(new_images, axis=0) res_image = Image.fromarray(res_image) return res_image def concate_images_horizontally(images): """ 将图片竖向拼接 """ widths, heights = zip(*(i.size for i in images)) max_height = max(heights) new_images = [] for i, image in enumerate(images): if image.size[1] < max_height: image = image.resize((image.size[0], max_height)) new_images.append(image) res_image = np.concatenate(new_images, axis=1) res_image = Image.fromarray(res_image) return res_image def crop_image_from_background(image): image = np.array(image) # Convert the image to HSV color space hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) # Define range for white color in HSV lower_white = np.array([0, 0, 200]) upper_white = np.array([180, 200, 255]) # Threshold the HSV image to get only white colors (background) mask = cv2.inRange(hsv, lower_white, upper_white) mask = cv2.bitwise_not(mask) mask = cv2.erode(mask, None, iterations=1) mask = cv2.dilate(mask, None, iterations=10) # 获取最外层轮廓 contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) boxes = [] for cnt in contours: # 获取当前轮廓的最小面积矩形 rect = cv2.minAreaRect(cnt) # TODO: 可通过位置关系剔除边缘位置小矩形 w, h = rect[1] if w < 10 or h < 10: continue # 计算矩形的中心 box = cv2.boxPoints(rect) box = np.intp(box) boxes.append(box) if len(boxes) > 0: boxes = np.array(boxes) min_x, min_y = np.min(boxes, axis=(0, 1)) max_x, max_y = np.max(boxes, axis=(0, 1)) return image[min_y:max_y, min_x:max_x] else: return image def get_products_from_pdf_file(file): """ 1. 用户可上传多份PDF文件 2. 每份PDF文件可以包含一份作品,也可以是多份作品 """ products = [] if file.name.endswith('.pdf'): images = [] pdf = fitz.open(file.name) for page_num, page in enumerate(pdf, start=1): # 渲染页面为图像 pix = page.get_pixmap() image = Image.frombytes('RGB', [pix.width, pix.height], pix.samples) images.append(image) pdf.close() # 一份PDF文件仅包含一份作品 if len(images) == 2: products.append(images) # 一份PDF文件包含多份作品 if len(images) > 2: for i in range(0, len(images) // 2): products.append(images[i * 2:i * 2 + 2]) return products