Spaces:
Sleeping
Sleeping
Update Doors_Schedule
Browse files- Doors_Schedule +48 -13
Doors_Schedule
CHANGED
|
@@ -11,8 +11,8 @@ from PyPDF2.generic import TextStringObject, NameObject, ArrayObject, FloatObjec
|
|
| 11 |
from PyPDF2.generic import NameObject, TextStringObject, DictionaryObject, FloatObject, ArrayObject
|
| 12 |
from PyPDF2 import PdfReader
|
| 13 |
from PyPDF2.generic import TextStringObject
|
| 14 |
-
|
| 15 |
-
|
| 16 |
|
| 17 |
|
| 18 |
def convert2img(path):
|
|
@@ -35,7 +35,7 @@ def calculate_midpoint(x1,y1,x2,y2):
|
|
| 35 |
return (xm, ym)
|
| 36 |
|
| 37 |
def read_text(input_pdf_path):
|
| 38 |
-
pdf_document = fitz.open(input_pdf_path)
|
| 39 |
|
| 40 |
for page_num in range(pdf_document.page_count):
|
| 41 |
page = pdf_document[page_num]
|
|
@@ -167,7 +167,7 @@ def details_in_another_table(clmn_name, clmn_idx, current_dfs, dfs):
|
|
| 167 |
return new_df
|
| 168 |
|
| 169 |
def extract_tables(schedule):
|
| 170 |
-
doc = fitz.open(schedule)
|
| 171 |
for page in doc:
|
| 172 |
tabs = page.find_tables()
|
| 173 |
dfs = []
|
|
@@ -398,23 +398,27 @@ def modify_author_in_pypdf2(pdf_bytes, new_authors):
|
|
| 398 |
#Save the modified PDF to a variable
|
| 399 |
output_stream = io.BytesIO()
|
| 400 |
writer.write(output_stream)
|
|
|
|
|
|
|
|
|
|
| 401 |
|
| 402 |
-
return output_stream.getvalue() # Return modified PDF as bytes
|
| 403 |
|
| 404 |
def process_pdf(input_pdf_path, output_pdf_path, locations, new_authors):
|
| 405 |
#Load original PDF
|
| 406 |
-
with open(input_pdf_path, "rb") as file:
|
| 407 |
-
|
| 408 |
|
| 409 |
#Add Bluebeam-compatible count annotations
|
| 410 |
-
annotated_pdf_bytes = add_bluebeam_count_annotations(
|
| 411 |
|
| 412 |
#Modify author field using PyPDF2
|
| 413 |
final_pdf_bytes = modify_author_in_pypdf2(annotated_pdf_bytes, new_authors)
|
| 414 |
-
|
| 415 |
-
#Save the final modified PDF to disk
|
| 416 |
-
with open(output_pdf_path, "wb") as file:
|
| 417 |
-
|
|
|
|
| 418 |
def mainRun(schedule, plan):
|
| 419 |
dfs = extract_tables(schedule)
|
| 420 |
selected_columns = get_selected_columns(dfs)
|
|
@@ -433,5 +437,36 @@ def mainRun(schedule, plan):
|
|
| 433 |
width_info_tobeprinted = get_width_info_tobeprinted(new_data)
|
| 434 |
cleaned_width = get_cleaned_width(width_info_tobeprinted)
|
| 435 |
widths = get_widths_bb_format(cleaned_width, kelma)
|
| 436 |
-
process_pdf(plan, "final_output_width.pdf", new_data, widths)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 437 |
|
|
|
|
| 11 |
from PyPDF2.generic import NameObject, TextStringObject, DictionaryObject, FloatObject, ArrayObject
|
| 12 |
from PyPDF2 import PdfReader
|
| 13 |
from PyPDF2.generic import TextStringObject
|
| 14 |
+
import numpy as np
|
| 15 |
+
import cv2
|
| 16 |
|
| 17 |
|
| 18 |
def convert2img(path):
|
|
|
|
| 35 |
return (xm, ym)
|
| 36 |
|
| 37 |
def read_text(input_pdf_path):
|
| 38 |
+
pdf_document = fitz.open('pdf',input_pdf_path)
|
| 39 |
|
| 40 |
for page_num in range(pdf_document.page_count):
|
| 41 |
page = pdf_document[page_num]
|
|
|
|
| 167 |
return new_df
|
| 168 |
|
| 169 |
def extract_tables(schedule):
|
| 170 |
+
doc = fitz.open("pdf",schedule)
|
| 171 |
for page in doc:
|
| 172 |
tabs = page.find_tables()
|
| 173 |
dfs = []
|
|
|
|
| 398 |
#Save the modified PDF to a variable
|
| 399 |
output_stream = io.BytesIO()
|
| 400 |
writer.write(output_stream)
|
| 401 |
+
output_stream.seek(0)
|
| 402 |
+
|
| 403 |
+
return output_stream.read()
|
| 404 |
|
| 405 |
+
# return output_stream.getvalue() # Return modified PDF as bytes
|
| 406 |
|
| 407 |
def process_pdf(input_pdf_path, output_pdf_path, locations, new_authors):
|
| 408 |
#Load original PDF
|
| 409 |
+
# with open(input_pdf_path, "rb") as file:
|
| 410 |
+
# original_pdf_bytes = file.read()
|
| 411 |
|
| 412 |
#Add Bluebeam-compatible count annotations
|
| 413 |
+
annotated_pdf_bytes = add_bluebeam_count_annotations(input_pdf_path, locations)
|
| 414 |
|
| 415 |
#Modify author field using PyPDF2
|
| 416 |
final_pdf_bytes = modify_author_in_pypdf2(annotated_pdf_bytes, new_authors)
|
| 417 |
+
return final_pdf_bytes
|
| 418 |
+
# #Save the final modified PDF to disk
|
| 419 |
+
# with open(output_pdf_path, "wb") as file:
|
| 420 |
+
# file.write(final_pdf_bytes)
|
| 421 |
+
|
| 422 |
def mainRun(schedule, plan):
|
| 423 |
dfs = extract_tables(schedule)
|
| 424 |
selected_columns = get_selected_columns(dfs)
|
|
|
|
| 437 |
width_info_tobeprinted = get_width_info_tobeprinted(new_data)
|
| 438 |
cleaned_width = get_cleaned_width(width_info_tobeprinted)
|
| 439 |
widths = get_widths_bb_format(cleaned_width, kelma)
|
| 440 |
+
final_pdf_bytes= process_pdf(plan, "final_output_width.pdf", new_data, widths)
|
| 441 |
+
|
| 442 |
+
|
| 443 |
+
doc2 =fitz.open('pdf',final_pdf_bytes)
|
| 444 |
+
page=doc2[0]
|
| 445 |
+
pix = page.get_pixmap() # render page to an image
|
| 446 |
+
pl=Image.frombytes('RGB', [pix.width,pix.height],pix.samples)
|
| 447 |
+
img=np.array(pl)
|
| 448 |
+
annotatedimg = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
|
| 449 |
+
|
| 450 |
+
|
| 451 |
+
list1=pd.DataFrame(columns=['content', 'id', 'subject','color'])
|
| 452 |
+
|
| 453 |
+
# for page in doc:
|
| 454 |
+
for page in doc2:
|
| 455 |
+
# Iterate through annotations on the page
|
| 456 |
+
for annot in page.annots():
|
| 457 |
+
# Get the color of the annotation
|
| 458 |
+
annot_color = annot.colors
|
| 459 |
+
if annot_color is not None:
|
| 460 |
+
# annot_color is a dictionary with 'stroke' and 'fill' keys
|
| 461 |
+
stroke_color = annot_color.get('stroke') # Border color
|
| 462 |
+
fill_color = annot_color.get('fill') # Fill color
|
| 463 |
+
if fill_color:
|
| 464 |
+
v='fill'
|
| 465 |
+
# print('fill')
|
| 466 |
+
if stroke_color:
|
| 467 |
+
v='stroke'
|
| 468 |
+
x,y,z=int(annot_color.get(v)[0]*255),int(annot_color.get(v)[1]*255),int(annot_color.get(v)[2]*255)
|
| 469 |
+
list1.loc[len(list1)] =[annot.info['content'],annot.info['id'],annot.info['subject'],[x,y,z]]
|
| 470 |
+
return annotatedimg, doc2 , list1, repeated_labels , not_found
|
| 471 |
+
|
| 472 |
|