Marthee commited on
Commit
09e8adb
·
verified ·
1 Parent(s): 205c053

Update Doors_Schedule

Browse files
Files changed (1) hide show
  1. Doors_Schedule +48 -13
Doors_Schedule CHANGED
@@ -11,8 +11,8 @@ from PyPDF2.generic import TextStringObject, NameObject, ArrayObject, FloatObjec
11
  from PyPDF2.generic import NameObject, TextStringObject, DictionaryObject, FloatObject, ArrayObject
12
  from PyPDF2 import PdfReader
13
  from PyPDF2.generic import TextStringObject
14
-
15
-
16
 
17
 
18
  def convert2img(path):
@@ -35,7 +35,7 @@ def calculate_midpoint(x1,y1,x2,y2):
35
  return (xm, ym)
36
 
37
  def read_text(input_pdf_path):
38
- pdf_document = fitz.open(input_pdf_path)
39
 
40
  for page_num in range(pdf_document.page_count):
41
  page = pdf_document[page_num]
@@ -167,7 +167,7 @@ def details_in_another_table(clmn_name, clmn_idx, current_dfs, dfs):
167
  return new_df
168
 
169
  def extract_tables(schedule):
170
- doc = fitz.open(schedule)
171
  for page in doc:
172
  tabs = page.find_tables()
173
  dfs = []
@@ -398,23 +398,27 @@ def modify_author_in_pypdf2(pdf_bytes, new_authors):
398
  #Save the modified PDF to a variable
399
  output_stream = io.BytesIO()
400
  writer.write(output_stream)
 
 
 
401
 
402
- return output_stream.getvalue() # Return modified PDF as bytes
403
 
404
  def process_pdf(input_pdf_path, output_pdf_path, locations, new_authors):
405
  #Load original PDF
406
- with open(input_pdf_path, "rb") as file:
407
- original_pdf_bytes = file.read()
408
 
409
  #Add Bluebeam-compatible count annotations
410
- annotated_pdf_bytes = add_bluebeam_count_annotations(original_pdf_bytes, locations)
411
 
412
  #Modify author field using PyPDF2
413
  final_pdf_bytes = modify_author_in_pypdf2(annotated_pdf_bytes, new_authors)
414
-
415
- #Save the final modified PDF to disk
416
- with open(output_pdf_path, "wb") as file:
417
- file.write(final_pdf_bytes)
 
418
  def mainRun(schedule, plan):
419
  dfs = extract_tables(schedule)
420
  selected_columns = get_selected_columns(dfs)
@@ -433,5 +437,36 @@ def mainRun(schedule, plan):
433
  width_info_tobeprinted = get_width_info_tobeprinted(new_data)
434
  cleaned_width = get_cleaned_width(width_info_tobeprinted)
435
  widths = get_widths_bb_format(cleaned_width, kelma)
436
- process_pdf(plan, "final_output_width.pdf", new_data, widths)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
437
 
 
11
  from PyPDF2.generic import NameObject, TextStringObject, DictionaryObject, FloatObject, ArrayObject
12
  from PyPDF2 import PdfReader
13
  from PyPDF2.generic import TextStringObject
14
+ import numpy as np
15
+ import cv2
16
 
17
 
18
  def convert2img(path):
 
35
  return (xm, ym)
36
 
37
  def read_text(input_pdf_path):
38
+ pdf_document = fitz.open('pdf',input_pdf_path)
39
 
40
  for page_num in range(pdf_document.page_count):
41
  page = pdf_document[page_num]
 
167
  return new_df
168
 
169
  def extract_tables(schedule):
170
+ doc = fitz.open("pdf",schedule)
171
  for page in doc:
172
  tabs = page.find_tables()
173
  dfs = []
 
398
  #Save the modified PDF to a variable
399
  output_stream = io.BytesIO()
400
  writer.write(output_stream)
401
+ output_stream.seek(0)
402
+
403
+ return output_stream.read()
404
 
405
+ # return output_stream.getvalue() # Return modified PDF as bytes
406
 
407
  def process_pdf(input_pdf_path, output_pdf_path, locations, new_authors):
408
  #Load original PDF
409
+ # with open(input_pdf_path, "rb") as file:
410
+ # original_pdf_bytes = file.read()
411
 
412
  #Add Bluebeam-compatible count annotations
413
+ annotated_pdf_bytes = add_bluebeam_count_annotations(input_pdf_path, locations)
414
 
415
  #Modify author field using PyPDF2
416
  final_pdf_bytes = modify_author_in_pypdf2(annotated_pdf_bytes, new_authors)
417
+ return final_pdf_bytes
418
+ # #Save the final modified PDF to disk
419
+ # with open(output_pdf_path, "wb") as file:
420
+ # file.write(final_pdf_bytes)
421
+
422
  def mainRun(schedule, plan):
423
  dfs = extract_tables(schedule)
424
  selected_columns = get_selected_columns(dfs)
 
437
  width_info_tobeprinted = get_width_info_tobeprinted(new_data)
438
  cleaned_width = get_cleaned_width(width_info_tobeprinted)
439
  widths = get_widths_bb_format(cleaned_width, kelma)
440
+ final_pdf_bytes= process_pdf(plan, "final_output_width.pdf", new_data, widths)
441
+
442
+
443
+ doc2 =fitz.open('pdf',final_pdf_bytes)
444
+ page=doc2[0]
445
+ pix = page.get_pixmap() # render page to an image
446
+ pl=Image.frombytes('RGB', [pix.width,pix.height],pix.samples)
447
+ img=np.array(pl)
448
+ annotatedimg = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
449
+
450
+
451
+ list1=pd.DataFrame(columns=['content', 'id', 'subject','color'])
452
+
453
+ # for page in doc:
454
+ for page in doc2:
455
+ # Iterate through annotations on the page
456
+ for annot in page.annots():
457
+ # Get the color of the annotation
458
+ annot_color = annot.colors
459
+ if annot_color is not None:
460
+ # annot_color is a dictionary with 'stroke' and 'fill' keys
461
+ stroke_color = annot_color.get('stroke') # Border color
462
+ fill_color = annot_color.get('fill') # Fill color
463
+ if fill_color:
464
+ v='fill'
465
+ # print('fill')
466
+ if stroke_color:
467
+ v='stroke'
468
+ x,y,z=int(annot_color.get(v)[0]*255),int(annot_color.get(v)[1]*255),int(annot_color.get(v)[2]*255)
469
+ list1.loc[len(list1)] =[annot.info['content'],annot.info['id'],annot.info['subject'],[x,y,z]]
470
+ return annotatedimg, doc2 , list1, repeated_labels , not_found
471
+
472