pic2pdf_explorer / image_process.py
wuzengcheng
[feat] 代码模块化&去除文件保存&界面优化
1c3aebe
import cv2
import fitz
import numpy as np
from PIL import Image, ImageDraw, ImageFont
def concate_images_vertically(images, confidence_values=[]):
"""
将图片竖向拼接
"""
widths, heights = zip(*(i.size for i in images))
max_width = max(widths)
new_images = []
for i, image in enumerate(images):
if image.size[0] < max_width:
image = image.resize((max_width, image.size[1]))
if len(confidence_values) > 0:
draw = ImageDraw.Draw(image)
font = ImageFont.truetype("assets/SIMKAI.TTF", 100)
text_color = (255, 0, 0)
draw.text((100, image.size[1] - 100), '概率:' + '{:.3f}'.format(confidence_values[i]), font=font, fill=text_color)
new_images.append(image)
res_image = np.concatenate(new_images, axis=0)
res_image = Image.fromarray(res_image)
return res_image
def concate_images_horizontally(images):
"""
将图片竖向拼接
"""
widths, heights = zip(*(i.size for i in images))
max_height = max(heights)
new_images = []
for i, image in enumerate(images):
if image.size[1] < max_height:
image = image.resize((image.size[0], max_height))
new_images.append(image)
res_image = np.concatenate(new_images, axis=1)
res_image = Image.fromarray(res_image)
return res_image
def crop_image_from_background(image):
image = np.array(image)
# Convert the image to HSV color space
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
# Define range for white color in HSV
lower_white = np.array([0, 0, 200])
upper_white = np.array([180, 200, 255])
# Threshold the HSV image to get only white colors (background)
mask = cv2.inRange(hsv, lower_white, upper_white)
mask = cv2.bitwise_not(mask)
mask = cv2.erode(mask, None, iterations=1)
mask = cv2.dilate(mask, None, iterations=10)
# 获取最外层轮廓
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
boxes = []
for cnt in contours:
# 获取当前轮廓的最小面积矩形
rect = cv2.minAreaRect(cnt)
# TODO: 可通过位置关系剔除边缘位置小矩形
w, h = rect[1]
if w < 10 or h < 10:
continue
# 计算矩形的中心
box = cv2.boxPoints(rect)
box = np.intp(box)
boxes.append(box)
if len(boxes) > 0:
boxes = np.array(boxes)
min_x, min_y = np.min(boxes, axis=(0, 1))
max_x, max_y = np.max(boxes, axis=(0, 1))
return image[min_y:max_y, min_x:max_x]
else:
return image
def get_products_from_pdf_file(file):
"""
1. 用户可上传多份PDF文件
2. 每份PDF文件可以包含一份作品,也可以是多份作品
"""
products = []
if file.name.endswith('.pdf'):
images = []
pdf = fitz.open(file.name)
for page_num, page in enumerate(pdf, start=1):
# 渲染页面为图像
pix = page.get_pixmap()
image = Image.frombytes('RGB', [pix.width, pix.height], pix.samples)
images.append(image)
pdf.close()
# 一份PDF文件仅包含一份作品
if len(images) == 2:
products.append(images)
# 一份PDF文件包含多份作品
if len(images) > 2:
for i in range(0, len(images) // 2):
products.append(images[i * 2:i * 2 + 2])
return products