Marthee commited on
Commit
0538ca6
·
verified ·
1 Parent(s): c7da3b4

Update Doors_Schedule.py

Browse files
Files changed (1) hide show
  1. Doors_Schedule.py +73 -162
Doors_Schedule.py CHANGED
@@ -385,7 +385,7 @@ def get_selected_columns_all(dfs, user_patterns):
385
  # 3ayz akhaleehaa te search fel selected_columns column names nafsaha
386
  # 7ab2a 3ayz a3raf bardo maktooba ezay fel df el 7a2e2ya (akeed za ma el user medakhalha bezabt)
387
  def get_st_op_pattern(selected_columns, user_input):
388
- target = 'structural opening'
389
  if target in selected_columns.columns:
390
  name = user_input[2]
391
  return name
@@ -538,22 +538,29 @@ def get_cleaned_data(locations):
538
  return new_data
539
 
540
 
 
541
  def get_width_info_tobeprinted(new_data):
542
  width_info_tobeprinted = []
543
  if len(new_data[0]) == 4:
544
  for _,_,_, w in new_data:
545
- w = re.sub(r",", "", w)
546
- w = int(float(w))
547
  width_info_tobeprinted.append(w)
548
  if len(new_data[0]) == 5:
549
  for _,_,_, w,h in new_data:
550
  w = re.sub(r",", "", w)
551
  h = re.sub(r",", "", h)
552
- w = int(float(w))
553
- h = int(float(h))
 
 
 
 
 
 
554
  width_info_tobeprinted.append(f"{w} mm wide x {h} mm high")
555
  return width_info_tobeprinted
556
-
557
  def clean_dimensions(text):
558
  # Remove commas and "mm"
559
  text = re.sub(r'[,\s]*mm', '', text) # Remove "mm" with optional spaces or commas before it
@@ -566,6 +573,7 @@ def get_cleaned_width(width_info_tobeprinted):
566
  cleaned_width.append(clean_dimensions(w))
567
  return cleaned_width
568
 
 
569
  def get_widths_bb_format(cleaned_width, kelma):
570
  pattern = r"\bW(?:idth)?\s*[×x]\s*H(?:eight)?\b"
571
  match = re.search(pattern, kelma)
@@ -577,9 +585,9 @@ def get_widths_bb_format(cleaned_width, kelma):
577
  width_name = int(float(width_name))
578
  height_name = int(float(height_name))
579
  if match:
580
- full_text = f"{width_name}mm wide x {height_name}mm high"
581
  else:
582
- full_text = f"{height_name}mm wide x {width_name}mm high"
583
  widths.append(full_text)
584
  return widths
585
 
@@ -757,13 +765,14 @@ def get_width_info_tobeprinted_secondary(new_data, main_info, secondary_info):
757
 
758
  if len(main_info) == 4 and len(secondary_info) == 2:
759
  for coords, label, width, height, acous, fire, color in new_data:
760
- #w = re.sub(r",", "", width)
761
- #h = re.sub(r",", "", height)
762
- #w = int(float(w))
763
- #h = int(float(h))
764
- width_info_tobeprinted.append(f"{width} mm wide x {height} mm high")
765
  secondary_info_tobeprinted.append((acous, fire))
766
  return width_info_tobeprinted, secondary_info_tobeprinted
 
767
  def get_flattened_tuples_list_SECONDARY(col_dict):
768
  tuples_list = []
769
 
@@ -1511,30 +1520,26 @@ def mainRun(schedule, plan, searcharray):
1511
 
1512
  dfs = extract_tables(schedule)
1513
 
1514
- all_new_data = []
1515
- all_widths = []
1516
- pdf_outputs = []
1517
-
1518
- if len(searcharray)>1:
 
1519
  for j in range(len(searcharray)):
1520
- pdfs = []
1521
  user_input = searcharray[j]
 
1522
  secondary_presence = False
1523
-
1524
  if user_input[4] or user_input[5]:
1525
  secondary_presence = True
1526
-
1527
  main_info_, secondary_info_ = separate_main_secondary(user_input)
1528
-
1529
  main_info = [item for item in main_info_ if item]
1530
- secondary_info = [item for item in secondary_info_ if item]
1531
-
1532
  print("feh secondary information")
1533
  if user_input[4]:
1534
  print("Fire rate mawgooda")
1535
  if user_input[5]:
1536
  print("Acoustic Rate mawgooda")
1537
-
1538
  else:
1539
  print("mafeesh secondary information")
1540
 
@@ -1542,9 +1547,9 @@ def mainRun(schedule, plan, searcharray):
1542
  kelma = get_st_op_pattern(selected_columns_combined, user_input)
1543
  col_dict = get_similar_colors_all(selected_columns_combined)
1544
  flattened_list = get_flattened_tuples_list_all(col_dict)
1545
-
 
1546
  if secondary_presence:
1547
- for p in plan:
1548
  plan_texts = read_text(p)
1549
  locations, not_found = get_word_locations_plan_secondary(flattened_list,plan_texts, main_info, secondary_info)
1550
  new_data3 = get_cleaned_data_secondary(locations,main_info,secondary_info)
@@ -1558,153 +1563,61 @@ def mainRun(schedule, plan, searcharray):
1558
  width_info_tobeprinted, secondary_tobeprinted = get_width_info_tobeprinted_secondary(new_data3, main_info, secondary_info)
1559
  cleaned_width = get_cleaned_width(width_info_tobeprinted)
1560
  widths = get_widths_bb_format(cleaned_width, kelma)
 
 
 
 
1561
  secondary_printed_clean = get_secondary_tobeprinted_clean(selected_columns_combined, secondary_tobeprinted, secondary_info)
1562
  all_print = mix_width_secondary(widths, secondary_printed_clean)
1563
-
1564
  #Single page annotation
1565
  all_widths.append(all_print)
1566
 
1567
- flat_list_new_data = [item for sublist in all_new_data for item in sublist]
1568
- flat_list_widths = [item for sublist in all_widths for item in sublist]
1569
 
1570
  if pdf_outputs:
1571
- final_pdf_bytes = process_pdf_secondary(pdf_outputs[j-1], "final_output_multiple_input_new2.pdf", flat_list_new_data, flat_list_widths, main_info, secondary_info)
1572
  pdf_outputs.append(final_pdf_bytes)
1573
  else:
1574
- final_pdf_bytes = process_pdf_secondary(p, "final_output_multiple_input_new2.pdf", flat_list_new_data, flat_list_widths, main_info, secondary_info)
1575
  pdf_outputs.append(final_pdf_bytes)
1576
-
1577
- pdfs.append(final_pdf_bytes)
1578
-
1579
-
1580
- else:
1581
- for p in plan:
1582
- print(f" p in plan is {type(p)}")
1583
- print(p)
1584
- plan_texts = read_text(p)
1585
- locations, not_found = get_word_locations_plan(flattened_list,plan_texts)
1586
- new_data = get_cleaned_data(locations)
1587
- #Single page annotation
1588
- all_new_data.append(new_data)
1589
- repeated_labels = get_repeated_labels(locations)
1590
- if kelma == None:
1591
- widths = get_width_info_tobeprinted(new_data)
1592
- else:
1593
- width_info_tobeprinted = get_width_info_tobeprinted(new_data)
1594
- cleaned_width = get_cleaned_width(width_info_tobeprinted)
1595
- widths = get_widths_bb_format(cleaned_width, kelma)
1596
- #Single page annotation
1597
- all_widths.append(widths)
1598
-
1599
- flat_list_new_data = [item for sublist in all_new_data for item in sublist]
1600
- flat_list_widths = [item for sublist in all_widths for item in sublist]
1601
-
1602
- if pdf_outputs:
1603
- final_pdf_bytes = process_pdf(pdf_outputs[j-1], "final_output_width_trial.pdf", flat_list_new_data, flat_list_widths)
1604
- #final_pdf_bytes = process_pdf(p, "final_output_width.pdf", new_data, widths)
1605
- pdfs.append(final_pdf_bytes)
1606
- else:
1607
- final_pdf_bytes = process_pdf(p, "final_output_width_trial.pdf", flat_list_new_data, flat_list_widths)
1608
- #final_pdf_bytes = process_pdf(p, "final_output_width.pdf", new_data, widths)
1609
- pdfs.append(final_pdf_bytes)
1610
-
1611
- #Handling schedules without dimensions (width and height)
1612
- if selected_columns_combined.shape[1] == 2:
1613
- widths = []
1614
-
1615
- merged_pdf = merge_pdf_bytes_list(pdfs)
1616
- print(f"number of pges of merged_pdf is {len(merged_pdf)} and its type is {type(merged_pdf)}")
1617
-
1618
- else:
1619
- for j in range(len(searcharray)):
1620
- pdfs = []
1621
- user_input = searcharray[j]
1622
- secondary_presence = False
1623
-
1624
- if user_input[4] or user_input[5]:
1625
- secondary_presence = True
1626
-
1627
- main_info_, secondary_info_ = separate_main_secondary(user_input)
1628
-
1629
- main_info = [item for item in main_info_ if item]
1630
- secondary_info = [item for item in secondary_info_ if item]
1631
-
1632
- print("feh secondary information")
1633
- if user_input[4]:
1634
- print("Fire rate mawgooda")
1635
- if user_input[5]:
1636
- print("Acoustic Rate mawgooda")
1637
-
1638
  else:
1639
- print("mafeesh secondary information")
1640
-
1641
- selected_columns_combined = get_selected_columns_all(dfs, user_input)
1642
- kelma = get_st_op_pattern(selected_columns_combined, user_input)
1643
- col_dict = get_similar_colors_all(selected_columns_combined)
1644
- flattened_list = get_flattened_tuples_list_all(col_dict)
1645
-
1646
- if secondary_presence:
1647
- for p in plan:
1648
- plan_texts = read_text(p)
1649
- locations, not_found = get_word_locations_plan_secondary(flattened_list,plan_texts, main_info, secondary_info)
1650
- new_data3 = get_cleaned_data_secondary(locations,main_info,secondary_info)
1651
-
1652
- #Single page annotation
1653
- all_new_data.append(new_data3)
1654
- repeated_labels = get_repeated_labels(locations)
1655
- if kelma == None:
1656
- widths, secondary_tobeprinted = get_width_info_tobeprinted_secondary(new_data3, main_info, secondary_info)
1657
- else:
1658
- width_info_tobeprinted, secondary_tobeprinted = get_width_info_tobeprinted_secondary(new_data3, main_info, secondary_info)
1659
- cleaned_width = get_cleaned_width(width_info_tobeprinted)
1660
- widths = get_widths_bb_format(cleaned_width, kelma)
1661
- secondary_printed_clean = get_secondary_tobeprinted_clean(selected_columns_combined, secondary_tobeprinted, secondary_info)
1662
- all_print = mix_width_secondary(widths, secondary_printed_clean)
1663
-
1664
- #Single page annotation
1665
- all_widths.append(all_print)
1666
-
1667
- flat_list_new_data = [item for sublist in all_new_data for item in sublist]
1668
- flat_list_widths = [item for sublist in all_widths for item in sublist]
1669
-
1670
-
1671
- final_pdf_bytes = process_pdf_secondary(p, "final_output_multiple_input_new2.pdf", flat_list_new_data, flat_list_widths, main_info, secondary_info)
1672
- pdfs.append(final_pdf_bytes)
1673
 
 
 
1674
 
1675
- else:
1676
- for p in plan:
1677
- print(f" p in plan is {type(p)}")
1678
- print(p)
1679
- plan_texts = read_text(p)
1680
- locations, not_found = get_word_locations_plan(flattened_list,plan_texts)
1681
- new_data = get_cleaned_data(locations)
1682
- #Single page annotation
1683
- all_new_data.append(new_data)
1684
- repeated_labels = get_repeated_labels(locations)
1685
- if kelma == None:
1686
- widths = get_width_info_tobeprinted(new_data)
1687
- else:
1688
- width_info_tobeprinted = get_width_info_tobeprinted(new_data)
1689
- cleaned_width = get_cleaned_width(width_info_tobeprinted)
1690
- widths = get_widths_bb_format(cleaned_width, kelma)
1691
- #Single page annotation
1692
- all_widths.append(widths)
1693
-
1694
- flat_list_new_data = [item for sublist in all_new_data for item in sublist]
1695
- flat_list_widths = [item for sublist in all_widths for item in sublist]
1696
-
1697
- final_pdf_bytes = process_pdf(p, "final_output_width_trial.pdf", flat_list_new_data, flat_list_widths)
1698
- #final_pdf_bytes = process_pdf(p, "final_output_width.pdf", new_data, widths)
1699
- pdfs.append(final_pdf_bytes)
1700
 
1701
- #Handling schedules without dimensions (width and height)
1702
- if selected_columns_combined.shape[1] == 2:
1703
- widths = []
1704
-
1705
- merged_pdf = merge_pdf_bytes_list(pdfs)
1706
- print(f"number of pges of merged_pdf is {len(merged_pdf)} and its type is {type(merged_pdf)}")
1707
 
 
 
 
 
1708
  not_found = []
1709
  doc2 =fitz.open('pdf',merged_pdf)
1710
  len_doc2 = len(doc2)
@@ -1735,6 +1648,4 @@ def mainRun(schedule, plan, searcharray):
1735
  v='stroke'
1736
  x,y,z=int(annot_color.get(v)[0]*255),int(annot_color.get(v)[1]*255),int(annot_color.get(v)[2]*255)
1737
  list1.loc[len(list1)] =[annot.info['content'],annot.info['id'],annot.info['subject'],[x,y,z]]
1738
- return annotatedimg, doc2 , list1, repeated_labels , not_found
1739
-
1740
-
 
385
  # 3ayz akhaleehaa te search fel selected_columns column names nafsaha
386
  # 7ab2a 3ayz a3raf bardo maktooba ezay fel df el 7a2e2ya (akeed za ma el user medakhalha bezabt)
387
  def get_st_op_pattern(selected_columns, user_input):
388
+ target = 'structural_opening'
389
  if target in selected_columns.columns:
390
  name = user_input[2]
391
  return name
 
538
  return new_data
539
 
540
 
541
+ # law 0.5 maslan tetkatab we law mesh keda yesheel el decimal point
542
  def get_width_info_tobeprinted(new_data):
543
  width_info_tobeprinted = []
544
  if len(new_data[0]) == 4:
545
  for _,_,_, w in new_data:
546
+ #w = re.sub(r",", "", w)
547
+ #w = int(float(w))
548
  width_info_tobeprinted.append(w)
549
  if len(new_data[0]) == 5:
550
  for _,_,_, w,h in new_data:
551
  w = re.sub(r",", "", w)
552
  h = re.sub(r",", "", h)
553
+ if float(w).is_integer():
554
+ w = int(float(w))
555
+ else:
556
+ w = w
557
+ if float(h).is_integer():
558
+ h = int(float(h))
559
+ else:
560
+ h = h
561
  width_info_tobeprinted.append(f"{w} mm wide x {h} mm high")
562
  return width_info_tobeprinted
563
+
564
  def clean_dimensions(text):
565
  # Remove commas and "mm"
566
  text = re.sub(r'[,\s]*mm', '', text) # Remove "mm" with optional spaces or commas before it
 
573
  cleaned_width.append(clean_dimensions(w))
574
  return cleaned_width
575
 
576
+
577
  def get_widths_bb_format(cleaned_width, kelma):
578
  pattern = r"\bW(?:idth)?\s*[×x]\s*H(?:eight)?\b"
579
  match = re.search(pattern, kelma)
 
585
  width_name = int(float(width_name))
586
  height_name = int(float(height_name))
587
  if match:
588
+ full_text = f"{width_name} mm wide x {height_name} mm high"
589
  else:
590
+ full_text = f"{height_name} mm wide x {width_name} mm high"
591
  widths.append(full_text)
592
  return widths
593
 
 
765
 
766
  if len(main_info) == 4 and len(secondary_info) == 2:
767
  for coords, label, width, height, acous, fire, color in new_data:
768
+ w = re.sub(r",", "", width)
769
+ h = re.sub(r",", "", height)
770
+ w = int(float(w))
771
+ h = int(float(h))
772
+ width_info_tobeprinted.append(f"{w} mm wide x {h} mm high")
773
  secondary_info_tobeprinted.append((acous, fire))
774
  return width_info_tobeprinted, secondary_info_tobeprinted
775
+
776
  def get_flattened_tuples_list_SECONDARY(col_dict):
777
  tuples_list = []
778
 
 
1520
 
1521
  dfs = extract_tables(schedule)
1522
 
1523
+ pdfs = []
1524
+ for p in plan:
1525
+ all_new_data = []
1526
+ all_widths = []
1527
+ pdf_outputs = []
1528
+
1529
  for j in range(len(searcharray)):
 
1530
  user_input = searcharray[j]
1531
+
1532
  secondary_presence = False
 
1533
  if user_input[4] or user_input[5]:
1534
  secondary_presence = True
 
1535
  main_info_, secondary_info_ = separate_main_secondary(user_input)
 
1536
  main_info = [item for item in main_info_ if item]
1537
+ secondary_info = [item for item in secondary_info_ if item]
 
1538
  print("feh secondary information")
1539
  if user_input[4]:
1540
  print("Fire rate mawgooda")
1541
  if user_input[5]:
1542
  print("Acoustic Rate mawgooda")
 
1543
  else:
1544
  print("mafeesh secondary information")
1545
 
 
1547
  kelma = get_st_op_pattern(selected_columns_combined, user_input)
1548
  col_dict = get_similar_colors_all(selected_columns_combined)
1549
  flattened_list = get_flattened_tuples_list_all(col_dict)
1550
+ plan_texts = read_text(p)
1551
+
1552
  if secondary_presence:
 
1553
  plan_texts = read_text(p)
1554
  locations, not_found = get_word_locations_plan_secondary(flattened_list,plan_texts, main_info, secondary_info)
1555
  new_data3 = get_cleaned_data_secondary(locations,main_info,secondary_info)
 
1563
  width_info_tobeprinted, secondary_tobeprinted = get_width_info_tobeprinted_secondary(new_data3, main_info, secondary_info)
1564
  cleaned_width = get_cleaned_width(width_info_tobeprinted)
1565
  widths = get_widths_bb_format(cleaned_width, kelma)
1566
+ #Handling schedules without dimensions (width and height)
1567
+ if selected_columns_combined.shape[1] == 2:
1568
+ widths = []
1569
+
1570
  secondary_printed_clean = get_secondary_tobeprinted_clean(selected_columns_combined, secondary_tobeprinted, secondary_info)
1571
  all_print = mix_width_secondary(widths, secondary_printed_clean)
1572
+
1573
  #Single page annotation
1574
  all_widths.append(all_print)
1575
 
1576
+ #flat_list_new_data = [item for sublist in all_new_data for item in sublist]
1577
+ #flat_list_widths = [item for sublist in all_widths for item in sublist]
1578
 
1579
  if pdf_outputs:
1580
+ final_pdf_bytes = process_pdf_secondary(pdf_outputs[j-1], "final_output_multiple_input_new2.pdf", all_new_data[j], all_widths[j], main_info, secondary_info)
1581
  pdf_outputs.append(final_pdf_bytes)
1582
  else:
1583
+ final_pdf_bytes = process_pdf_secondary(p, "final_output_multiple_input_new2.pdf", all_new_data[j], all_widths[j], main_info, secondary_info)
1584
  pdf_outputs.append(final_pdf_bytes)
1585
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1586
  else:
1587
+ locations, not_found = get_word_locations_plan(flattened_list,plan_texts)
1588
+ new_data = get_cleaned_data(locations)
1589
+ #Single page annotation
1590
+ all_new_data.append(new_data)
1591
+ repeated_labels = get_repeated_labels(locations)
1592
+ if kelma == None:
1593
+ widths = get_width_info_tobeprinted(new_data)
1594
+ else:
1595
+ width_info_tobeprinted = get_width_info_tobeprinted(new_data)
1596
+ cleaned_width = get_cleaned_width(width_info_tobeprinted)
1597
+ widths = get_widths_bb_format(cleaned_width, kelma)
1598
+
1599
+ #Handling schedules without dimensions (width and height)
1600
+ if selected_columns_combined.shape[1] == 2:
1601
+ widths = []
1602
+
1603
+ #Single page annotation
1604
+ all_widths.append(widths)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1605
 
1606
+ flat_list_new_data = [item for sublist in all_new_data for item in sublist]
1607
+ flat_list_widths = [item for sublist in all_widths for item in sublist]
1608
 
1609
+ if pdf_outputs:
1610
+ final_pdf_bytes = process_pdf(pdf_outputs[j-1], "final_output_width_trial.pdf", all_new_data[j], all_widths[j])
1611
+ pdf_outputs.append(final_pdf_bytes)
1612
+ else:
1613
+ final_pdf_bytes = process_pdf(p, "final_output_width_trial.pdf", all_new_data[j], all_widths[j])
1614
+ pdf_outputs.append(final_pdf_bytes)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1615
 
 
 
 
 
 
 
1616
 
1617
+ pdfs.append(final_pdf_bytes)
1618
+ merged_pdf = merge_pdf_bytes_list(pdfs)
1619
+ print(f"number of pges of merged_pdf is {len(merged_pdf)} and its type is {type(merged_pdf)}")
1620
+
1621
  not_found = []
1622
  doc2 =fitz.open('pdf',merged_pdf)
1623
  len_doc2 = len(doc2)
 
1648
  v='stroke'
1649
  x,y,z=int(annot_color.get(v)[0]*255),int(annot_color.get(v)[1]*255),int(annot_color.get(v)[2]*255)
1650
  list1.loc[len(list1)] =[annot.info['content'],annot.info['id'],annot.info['subject'],[x,y,z]]
1651
+ return annotatedimg, doc2 , list1, repeated_labels , not_found