File size: 7,100 Bytes
b874e9b 961d741 d2b84a1 961d741 b874e9b 72f3ab6 b874e9b 72f3ab6 b874e9b 72f3ab6 b874e9b 72f3ab6 b874e9b 72f3ab6 b874e9b 961d741 b874e9b 961d741 8ec1624 961d741 b874e9b 8ec1624 b874e9b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 | import fitz # PyMuPDF
import gradio as gr
import zipfile
import io
import os
from datetime import datetime
import tempfile
# Helper function to find color areas
def find_color_areas(page, target_color, tolerance=30):
pix = page.get_pixmap()
width, height = pix.width, pix.height
visited = [[False for _ in range(width)] for _ in range(height)]
rectangles = []
def flood_fill(x, y):
stack = [(x, y)]
rects = []
while stack:
cx, cy = stack.pop()
if visited[cy][cx]:
continue
visited[cy][cx] = True
pixel_color = pix.pixel(cx, cy)
r, g, b = pixel_color[:3]
if (abs(r - target_color[0]) <= tolerance and
abs(g - target_color[1]) <= tolerance and
abs(b - target_color[2]) <= tolerance):
rects.append(fitz.Rect(cx, cy, cx + 1, cy + 1))
if cx > 0: stack.append((cx - 1, cy))
if cx < width - 1: stack.append((cx + 1, cy))
if cy > 0: stack.append((cx, cy - 1))
if cy < height - 1: stack.append((cx, cy + 1))
if rects:
bbox = fitz.Rect(min([r.x0 for r in rects]),
min([r.y0 for r in rects]),
max([r.x1 for r in rects]),
max([r.y1 for r in rects]))
return bbox
return None
for y in range(height):
for x in range(width):
if not visited[y][x]:
bbox = flood_fill(x, y)
if bbox:
rectangles.append(bbox)
return rectangles
def merge_overlapping_rectangles(rectangles):
merged_rects = []
while rectangles:
rect = rectangles.pop(0)
to_merge = [rect]
for other in rectangles[:]:
if rect.intersects(other):
to_merge.append(other)
rectangles.remove(other)
merged_rect = fitz.Rect(
min([r.x0 for r in to_merge]),
min([r.y0 for r in to_merge]),
max([r.x1 for r in to_merge]),
max([r.y1 for r in to_merge])
)
merged_rects.append(merged_rect)
return merged_rects
def markup_color_regions(doc, color_comment_pairs, tolerance=30, progress=None):
for page_num in range(len(doc)):
page = doc[page_num]
progress((page_num + 1) / len(doc)) # Update the progress as each page is processed
for color_comment_pair in color_comment_pairs:
target_color = color_comment_pair['color']
comment = color_comment_pair['comment']
stroke_color = color_comment_pair['stroke_color']
rectangles = find_color_areas(page, target_color, tolerance)
if rectangles:
merged_rectangles = merge_overlapping_rectangles(rectangles)
for bbox in merged_rectangles:
annot = page.add_rect_annot(bbox)
annot.set_colors(stroke=stroke_color)
annot.set_border(width=2)
annot.set_info({"title": "Markup", "content": comment})
annot.update()
def process_pdf_files(input_pdfs, selected_color_comment_indices, tolerance, custom_color, custom_comment, custom_stroke_color, progress=gr.Progress()):
color_comment_pairs = [
{
"color": (235, 128, 138),
"comment": "Structural Slab greater than architectural slab",
"stroke_color": (1, 0, 0)
},
{
"color": (128, 253, 128),
"comment": "Arch Slab greater than Structure",
"stroke_color": (0, 1, 0)
}
]
# Add custom color-comment pair if provided
if custom_color and custom_comment and custom_stroke_color:
custom_color_tuple = tuple(map(int, custom_color.split(','))) # Convert color to tuple
custom_stroke_tuple = tuple(map(int, custom_stroke_color.split(','))) # Convert stroke to tuple
color_comment_pairs.append({
"color": custom_color_tuple,
"comment": custom_comment,
"stroke_color": custom_stroke_tuple
})
# Get the selected color-comment pairs
selected_color_comment_pairs = [color_comment_pairs[i] for i in selected_color_comment_indices]
# List to keep track of all modified PDFs (in-memory)
modified_pdfs = []
# Process each input PDF file
for i, pdf_file in enumerate(input_pdfs):
with open(pdf_file.name, "rb") as file_stream:
doc = fitz.open(stream=file_stream.read(), filetype="pdf")
markup_color_regions(doc, selected_color_comment_pairs, tolerance, progress=lambda p: progress((i + p) / len(input_pdfs)))
# Save the modified PDF in memory
pdf_in_memory = io.BytesIO()
doc.save(pdf_in_memory)
doc.close()
# Move pointer to the beginning of the BytesIO object
pdf_in_memory.seek(0)
modified_pdfs.append((os.path.basename(pdf_file.name), pdf_in_memory))
# Create an in-memory ZIP file containing all modified PDFs
zip_in_memory = io.BytesIO()
with zipfile.ZipFile(zip_in_memory, 'w') as zipf:
for pdf_name, pdf_bytes in modified_pdfs:
zipf.writestr(pdf_name, pdf_bytes.read())
# Move pointer to the beginning of the ZIP file BytesIO object
zip_in_memory.seek(0)
# Generate the current date string
current_date = datetime.now().strftime("%Y-%m-%d")
# Create a named temporary file to store the ZIP
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=f"_CoordinationPDFS_{current_date}.zip")
with open(temp_file.name, 'wb') as tmp_file:
tmp_file.write(zip_in_memory.read())
return temp_file.name # Return the path to the temp file for Gradio to handle the download
# Define the Gradio interface
interface = gr.Interface(
fn=process_pdf_files,
inputs=[
gr.Files(label="Input PDF Files", file_types=[".pdf"]),
gr.CheckboxGroup( # Use CheckboxGroup for multi-select
label="Select Color-Comment Pairs",
choices=["Structural Slab vs Arch Slab", "Arch Slab vs Structural Slab", "Custom Option"],
type="index"
),
gr.Slider(label="Tolerance", minimum=0, maximum=100, step=1, value=30),
gr.Textbox(label="Custom Color (R,G,B)", placeholder="Enter custom color in RGB format, e.g., 255,0,0"),
gr.Textbox(label="Custom Comment", placeholder="Enter custom comment for this color"),
gr.Textbox(label="Custom Stroke Color (R,G,B)", placeholder="Enter stroke color in RGB format, e.g., 0,0,255")
],
outputs=gr.File(label="Download ZIP File"), # Use gr.File to trigger the download of the ZIP file
title="PDF Color Region Markup"
)
# Launch the Gradio app
interface.launch()
|