Marthee commited on
Commit
c1745f4
·
verified ·
1 Parent(s): 253abe7

Update Doors_Schedule.py

Browse files
Files changed (1) hide show
  1. Doors_Schedule.py +324 -44
Doors_Schedule.py CHANGED
@@ -433,6 +433,91 @@ def get_selected_columns_all(dfs, user_patterns):
433
  #break
434
  return selected_columns_new
435
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
436
 
437
  # 3ayz akhaleehaa te search fel selected_columns column names nafsaha
438
  # 7ab2a 3ayz a3raf bardo maktooba ezay fel df el 7a2e2ya (akeed za ma el user medakhalha bezabt)
@@ -1308,6 +1393,179 @@ def create_bb_bax(new_data, widthat, heightat, CountStyles, page_number, height_
1308
  return bax_annotations
1309
 
1310
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1311
  #Handle missing widths or heights in some rows
1312
  def generate_separate_dimensions(widths):
1313
  widthat = []
@@ -1409,6 +1667,7 @@ def pick_approach(schedule, plan, searcharray, flag):
1409
  if value == len(plan):
1410
  not_found_any_plan.append(key)
1411
  not_found_any_plan = [item for item in not_found_any_plan if item != "N/A"]
 
1412
  return no_tables, not_found_any_plan
1413
 
1414
  def mainRun(schedule, plan, searcharray):
@@ -1434,7 +1693,7 @@ def mainRun(schedule, plan, searcharray):
1434
  elif len(not_found_any_plan_model) < len(not_found_any_plan_normal):
1435
  pick_model = True
1436
  #print("choose not_found_any_plan_model")
1437
- else:
1438
  pick_normal = True
1439
  #print("choose any")
1440
 
@@ -1524,50 +1783,66 @@ def mainRun(schedule, plan, searcharray):
1524
  if secondary_presence:
1525
  main_info = main_info + [""]
1526
  flattened_list = get_flattened_tuples_list_no_doortype(selected_columns_combined)
 
 
 
1527
  plan_texts = read_text(p)
1528
 
1529
- if secondary_presence:
1530
- locations, not_found = get_word_locations_plan_secondary(flattened_list,plan_texts, main_info, secondary_info)
1531
- not_found_list.append(not_found)
1532
- new_data3 = get_cleaned_data_secondary(locations,main_info,secondary_info)
1533
-
1534
- repeated_labels = get_repeated_labels(locations)
1535
- repeated_labels = list(repeated_labels)
1536
- repeated_labels_list.append(repeated_labels)
1537
- if kelma == None:
1538
- widths, secondary_tobeprinted = get_width_info_tobeprinted_secondary(new_data3, main_info, secondary_info)
1539
- else:
1540
- width_info_tobeprinted, secondary_tobeprinted = get_width_info_tobeprinted_secondary(new_data3, main_info, secondary_info)
1541
- cleaned_width = get_cleaned_width(width_info_tobeprinted)
1542
- widths = get_widths_bb_format(cleaned_width, kelma)
1543
-
1544
-
1545
- #Count type annotation
1546
- widht_count, height_count = generate_separate_dimensions(widths)
1547
- bax = create_bb_bax_secondary(new_data3, widht_count, height_count, secondary_tobeprinted, CountStyles, user_input, page_number, page)
1548
- bax_annotations_all_inputs.append(bax)
1549
-
1550
-
 
 
 
 
 
 
 
 
 
1551
  else:
1552
- locations, not_found = get_word_locations_plan(flattened_list,plan_texts)
1553
- not_found_list.append(not_found)
1554
- new_data = get_cleaned_data(locations)
1555
- if len(new_data) == 0:
1556
- continue
1557
- repeated_labels = get_repeated_labels(locations)
1558
- repeated_labels = list(repeated_labels)
1559
- repeated_labels_list.append(repeated_labels)
1560
- if kelma == None:
1561
- widths = get_width_info_tobeprinted(new_data)
1562
- else:
1563
- width_info_tobeprinted = get_width_info_tobeprinted(new_data)
1564
- cleaned_width = get_cleaned_width(width_info_tobeprinted)
1565
- widths = get_widths_bb_format(cleaned_width, kelma)
1566
- #count type annotation
1567
- widht_count, height_count = generate_separate_dimensions(widths)
1568
- bax = create_bb_bax(new_data, widht_count, height_count, CountStyles, page_number, page)
1569
- bax_annotations_all_inputs.append(bax)
1570
-
 
 
 
 
1571
  # if it is not byte type
1572
  #pdfs_count_type.append(convert_to_bytes(p))
1573
  pdfs_count_type.append(p)
@@ -1580,8 +1855,13 @@ def mainRun(schedule, plan, searcharray):
1580
  for bax_ann in bax_annotations_all_inputs:
1581
  bax_annotation.extend(bax_ann)
1582
 
1583
- column_order = ['FireRating', 'AcousticRating', 'Height_', 'Width_']
1584
-
 
 
 
 
 
1585
  ## Getting the not found in all plans
1586
  flattened_not_found_list = [item for sublist in not_found_list for item in sublist]
1587
  counts_not_found = Counter(flattened_not_found_list)
 
433
  #break
434
  return selected_columns_new
435
 
436
+ #for new dictionary logic
437
+ def get_selected_columns_all(dfs, user_patterns):
438
+ selected_columns = []
439
+ selected_columns_new = None # Initialize selected_columns_new to None
440
+
441
+ for i in range(len(dfs)):
442
+
443
+
444
+
445
+ extra_info = user_patterns[6:]
446
+
447
+ main_info, secondary_info = separate_main_secondary(user_patterns)
448
+ clmn_name_main = get_column_name(main_info)
449
+ non_empty_main_info = [item for item in main_info if item]
450
+
451
+ clmn_name_secondary = get_column_name_secondary(secondary_info)
452
+
453
+
454
+ non_empty_secondary_info = [item for item in secondary_info if item]
455
+
456
+ #clmn_name = clmn_name_main + clmn_name_secondary
457
+ clmn_name = clmn_name_main + clmn_name_secondary + extra_info
458
+
459
+ non_empty_info = non_empty_main_info + non_empty_secondary_info
460
+
461
+ #print(f"main info: {main_info}")
462
+ print(f"clmn name: {clmn_name}")
463
+ print(f"non-empty info: {non_empty_info}")
464
+ #print(f"length of non-empty info: {len(non_empty_main_info)}")
465
+
466
+
467
+ cell_columns_appearance = flexible_search(dfs[i], non_empty_info)
468
+ cell_matches, col_matches = analyse_cell_columns(cell_columns_appearance)
469
+
470
+ print(f"length of cell_matches: {len(cell_matches)}")
471
+ print(f"cell_matches: {cell_matches}")
472
+ print(f"col_matches: {col_matches}")
473
+ #clmn_name = map_user_input_to_standard_labels(user_patterns)
474
+ #if len(clmn_name) < len(user_patterns):
475
+
476
+
477
+
478
+
479
+ print(clmn_name)
480
+
481
+ if len(cell_matches) == 0 and len(col_matches) == 0:
482
+ print(f"this is df {i}, SEARCH IN ANOTHER DF")
483
+
484
+ else:
485
+ #IN COLUMNS
486
+ if len(col_matches) == len(non_empty_info):
487
+ column_index_list = get_column_index(col_matches)
488
+ print(f"this is df {i} mawgooda fel columns, check el df length 3ashan law el details fe table tany")
489
+ #print(len(clm_idx))
490
+ #details in another table
491
+ print(column_index_list)
492
+ if len(dfs[i]) <10:
493
+ selected_columns_new = details_in_another_table(clmn_name, column_index_list, dfs[i], dfs)
494
+ #break
495
+ #other_matches = details_in_another_table_mod(clmn_name, clmn_idx, dfs[i], dfs)
496
+ #details in the same table
497
+ if len(dfs[i]) >10:
498
+ selected_columns_new = generate_current_table_without_cropping(column_index_list,dfs[i])
499
+ #break
500
+
501
+ #IN CELLS
502
+ if len(cell_matches) == len(non_empty_info):
503
+ row_index_list, column_index_list = get_row_column_indices(cell_matches)
504
+ print(f"this is df {i} mawgooda fel cells, check el df length 3ashan law el details fe table tany")
505
+
506
+ #details in another table
507
+ #if len(dfs[i]) <2:
508
+ #selected_columns_new = details_in_another_table(clmn_name, clmn_idx, dfs[i], dfs)
509
+ print(f"column names: {clmn_name}")
510
+ print(f"column index list: {column_index_list}")
511
+ selected_columns_new = details_in_another_table(clmn_name, column_index_list, dfs[i], dfs)
512
+ selected_columns_new2 = crop_rename_table(row_index_list, clmn_name, column_index_list,dfs[i])
513
+ selected_columns_new = pd.concat([selected_columns_new, selected_columns_new2], ignore_index=True)
514
+ break
515
+ #other_matches = details_in_another_table_mod(clmn_name, clmn_idx, dfs[i], dfs)
516
+ ##details in the same table
517
+ #if len(dfs[i]) >2:
518
+ # #print(f"this is df {i} call crop_rename_table(indices, clmn_name, clmn_idx,df)")
519
+ #break
520
+ return selected_columns_new
521
 
522
  # 3ayz akhaleehaa te search fel selected_columns column names nafsaha
523
  # 7ab2a 3ayz a3raf bardo maktooba ezay fel df el 7a2e2ya (akeed za ma el user medakhalha bezabt)
 
1393
  return bax_annotations
1394
 
1395
 
1396
+ def add_location(col_dict, plan_texts):
1397
+ not_found = []
1398
+ for key_outer, value_outer in col_dict.items():
1399
+ locations = []
1400
+ for id in value_outer['door_id']:
1401
+ location, _,_ = find_text_in_plan(id, plan_texts)
1402
+ if len(location) == 0:
1403
+ not_found.append(id)
1404
+ locations.append(location)
1405
+ value_outer['location'] = locations
1406
+ return col_dict, not_found
1407
+
1408
+ import pandas as pd
1409
+
1410
+ def _ensure_color_tuple(x):
1411
+ if x is None or isinstance(x, tuple):
1412
+ return x
1413
+ try:
1414
+ return tuple(x)
1415
+ except Exception:
1416
+ return x
1417
+
1418
+ def _ensure_list_of_tuples(val):
1419
+ if val is None:
1420
+ return []
1421
+ if isinstance(val, tuple):
1422
+ return [val]
1423
+ if isinstance(val, list):
1424
+ out = []
1425
+ for item in val:
1426
+ if item is None:
1427
+ continue
1428
+ if isinstance(item, tuple):
1429
+ out.append(item)
1430
+ elif isinstance(item, list):
1431
+ out.append(tuple(item))
1432
+ else:
1433
+ try:
1434
+ out.append(tuple(item))
1435
+ except Exception:
1436
+ pass
1437
+ return out
1438
+ try:
1439
+ return [tuple(val)]
1440
+ except Exception:
1441
+ return []
1442
+
1443
+ def grouped_to_dataframe_dynamic(grouped, keep_group=False,
1444
+ explode_locations=False,
1445
+ drop_empty_locations=False):
1446
+ rows = []
1447
+
1448
+ for group_key, block in grouped.items():
1449
+ ids = block.get('door_id') or block.get('values') or []
1450
+ list_lengths = [len(v) for v in block.values() if isinstance(v, list)]
1451
+ n = max(list_lengths + [len(ids)]) if (list_lengths or ids) else 0
1452
+ if n == 0:
1453
+ continue
1454
+
1455
+ for i in range(n):
1456
+ row = {}
1457
+ door_id = ids[i] if i < len(ids) else f"{group_key}:{i}"
1458
+ row['door_id'] = door_id
1459
+
1460
+ for k, v in block.items():
1461
+ if k == 'values':
1462
+ continue
1463
+ val = (v[i] if isinstance(v, list) and i < len(v)
1464
+ else (v if not isinstance(v, list) else None))
1465
+ if k == 'color':
1466
+ val = _ensure_color_tuple(val)
1467
+ elif k == 'location':
1468
+ val = _ensure_list_of_tuples(val)
1469
+ row[k] = val
1470
+
1471
+ if keep_group:
1472
+ row['source_group'] = group_key
1473
+ rows.append(row)
1474
+
1475
+ df = pd.DataFrame(rows) # dynamic union of keys
1476
+
1477
+ # If there's a 'location' column, normalize + optionally drop empties / explode
1478
+ if 'location' in df.columns:
1479
+ df['location'] = df['location'].apply(_ensure_list_of_tuples)
1480
+
1481
+ if drop_empty_locations:
1482
+ df = df[df['location'].map(lambda xs: len(xs) > 0)].reset_index(drop=True)
1483
+
1484
+ if explode_locations:
1485
+ # after filtering empties, explode so each row has a single (x,y) tuple
1486
+ df = df.explode('location', ignore_index=True)
1487
+
1488
+ return df
1489
+
1490
+ # Modify it to return widths and height from width, height columns
1491
+ def get_width_clean_width_height(width_list, height_list):
1492
+ widths = []
1493
+ heights = []
1494
+ for width in width_list:
1495
+ w = re.sub(r",", "", width)
1496
+ if is_not_number(w):
1497
+ w = w
1498
+ else:
1499
+ if float(w).is_integer():
1500
+ w = int(float(w))
1501
+ else:
1502
+ w = w
1503
+ w = str(w)
1504
+ widths.append(w)
1505
+ for height in height_list:
1506
+ h = re.sub(r",", "", height)
1507
+ if is_not_number(h):
1508
+ h = h
1509
+ else:
1510
+ if float(h).is_integer():
1511
+ h = int(float(h))
1512
+ else:
1513
+ h = h
1514
+ h = str(h)
1515
+ heights.append(h)
1516
+ return widths, heights
1517
+
1518
+ def get_widths_bb_format_st_op(cleaned_width, kelma):
1519
+ pattern = r"\bW(?:idth)?\s*[×x]\s*H(?:eight)?\b"
1520
+ match = re.search(pattern, kelma)
1521
+ widths = []
1522
+ heights = []
1523
+ for widthaa in cleaned_width:
1524
+ index = max(widthaa.find("x"), widthaa.find("×"), widthaa.find("x"), widthaa.find("X"), widthaa.find("x"))
1525
+ width_name = widthaa[:index]
1526
+ height_name = widthaa[index+1:]
1527
+ width_name = int(float(width_name))
1528
+ height_name = int(float(height_name))
1529
+ if match:
1530
+ full_text = f"{width_name} mm wide x {height_name} mm high"
1531
+ width = width_name
1532
+ height = height_name
1533
+ else:
1534
+ width = height_name
1535
+ height = width_name
1536
+ widths.append(width)
1537
+ heights.append(height)
1538
+ return widths, heights
1539
+
1540
+ # New for new dictionary logic
1541
+ def create_bb_bax_new(df_points, CountStyles, page_number, height_plan):
1542
+ bax_annotations = []
1543
+ exclude = {"location", "color"}
1544
+ for _, row in df_points.iterrows():
1545
+ rw = row
1546
+ customDta = row.drop(labels=exclude, errors="ignore").to_dict()
1547
+ r,g,b = rw['color']
1548
+ R = str(float(r/255))
1549
+ G = str(float(g/255))
1550
+ B = str(float(b/255))
1551
+ x, y = rw['location']
1552
+ vertix = point_mupdf_to_pdf(x, y, height_plan)
1553
+ bax_annotations.append({
1554
+ 'vertices': vertix,
1555
+ 'text': '1', #number of counts in one time (in markup written as count 1) -> if u want to change it we can look for a way
1556
+ 'author': 'ADR',
1557
+ 'custom_data': customDta, #identify custom colums here as( Column name: Text to add )
1558
+ 'label': rw['door_id'], #change label to whatever u want
1559
+ 'page' : page_number,
1560
+ 'color':R+ ' '+G + ' '+B,# normalized (RGB --> R/255 G/255 B/255)
1561
+ 'countstyle': CountStyles['Circle'],
1562
+ 'countsize':'0.8' #how big or small is the count icon
1563
+ })
1564
+
1565
+
1566
+ return bax_annotations, customDta
1567
+
1568
+
1569
  #Handle missing widths or heights in some rows
1570
  def generate_separate_dimensions(widths):
1571
  widthat = []
 
1667
  if value == len(plan):
1668
  not_found_any_plan.append(key)
1669
  not_found_any_plan = [item for item in not_found_any_plan if item != "N/A"]
1670
+
1671
  return no_tables, not_found_any_plan
1672
 
1673
  def mainRun(schedule, plan, searcharray):
 
1693
  elif len(not_found_any_plan_model) < len(not_found_any_plan_normal):
1694
  pick_model = True
1695
  #print("choose not_found_any_plan_model")
1696
+ else: # law ad ba3d choose the older approach (fitz)
1697
  pick_normal = True
1698
  #print("choose any")
1699
 
 
1783
  if secondary_presence:
1784
  main_info = main_info + [""]
1785
  flattened_list = get_flattened_tuples_list_no_doortype(selected_columns_combined)
1786
+
1787
+
1788
+
1789
  plan_texts = read_text(p)
1790
 
1791
+
1792
+ #locations, not_found = get_word_locations_plan_secondary(flattened_list,plan_texts, main_info, secondary_info)
1793
+ #not_found_list.append(not_found)
1794
+ #new_data3 = get_cleaned_data_secondary(locations,main_info,secondary_info)
1795
+
1796
+ #repeated_labels = get_repeated_labels(locations)
1797
+ #repeated_labels = list(repeated_labels)
1798
+ #repeated_labels_list.append(repeated_labels)
1799
+ col_dict, not_found = add_location(col_dict, plan_texts)
1800
+ not_found_list.append(not_found)
1801
+
1802
+ df_points = grouped_to_dataframe_dynamic(col_dict,
1803
+ drop_empty_locations=True,
1804
+ explode_locations=True)
1805
+ # handling no door type in the new dictionary logic
1806
+ if 'color' not in df_points:
1807
+ df_points['color'] = (0, 0, 255)
1808
+ dupes = df_points['door_id'].value_counts()
1809
+ repeated_ids = dupes[dupes > 1].index.to_list()
1810
+ repeated_labels_list.append(repeated_ids)
1811
+
1812
+ if kelma:
1813
+ lst_st_op = df_points["structural_opening"].tolist()
1814
+ cleaned_st_op = get_cleaned_width(lst_st_op)
1815
+ widths, heights = get_widths_bb_format_st_op(cleaned_st_op, kelma)
1816
+ # remove a column (returns a new df)
1817
+ df_points = df_points.drop(columns=['structural_opening'])
1818
+
1819
+ # add two columns (scalars, lists/arrays/Series of length len(df), or expressions)
1820
+ df_points['width'] = widths # e.g., a list/Series/np.array or a scalar
1821
+ df_points['height'] = heights
1822
  else:
1823
+ lst_width = df_points["width"].tolist()
1824
+ lst_height = df_points["height"].tolist()
1825
+ clean_widths, clean_height = get_width_clean_width_height(lst_width, lst_height)
1826
+ df_points["width"] = clean_widths
1827
+ df_points["height"] = clean_height
1828
+ df_points = df_points.rename(columns={'width': 'Width_', 'height':'Height_'})
1829
+
1830
+ #if kelma == None:
1831
+ #widths, secondary_tobeprinted = get_width_info_tobeprinted_secondary(new_data3, main_info, secondary_info)
1832
+ #else:
1833
+ #width_info_tobeprinted, secondary_tobeprinted = get_width_info_tobeprinted_secondary(new_data3, main_info, secondary_info)
1834
+ #cleaned_width = get_cleaned_width(width_info_tobeprinted)
1835
+ #widths = get_widths_bb_format(cleaned_width, kelma)
1836
+
1837
+
1838
+ #Count type annotation
1839
+ #widht_count, height_count = generate_separate_dimensions(widths)
1840
+ #bax = create_bb_bax_secondary(new_data3, widht_count, height_count, secondary_tobeprinted, CountStyles, user_input, page_number, page)
1841
+ #bax_annotations_all_inputs.append(bax)
1842
+ bax, customDta = create_bb_bax_new(df_points, CountStyles, page_number, page)
1843
+ bax_annotations_all_inputs.append(bax)
1844
+
1845
+
1846
  # if it is not byte type
1847
  #pdfs_count_type.append(convert_to_bytes(p))
1848
  pdfs_count_type.append(p)
 
1855
  for bax_ann in bax_annotations_all_inputs:
1856
  bax_annotation.extend(bax_ann)
1857
 
1858
+ #column_order = ['FireRating', 'AcousticRating', 'Height_', 'Width_']
1859
+ column_order = []
1860
+ for key in customDta.keys():
1861
+ column_order.append(key)
1862
+
1863
+
1864
+
1865
  ## Getting the not found in all plans
1866
  flattened_not_found_list = [item for sublist in not_found_list for item in sublist]
1867
  counts_not_found = Counter(flattened_not_found_list)