Marthee commited on
Commit
86f6cb5
·
verified ·
1 Parent(s): 13f62df

Update Counting_Columns_2_1.py

Browse files
Files changed (1) hide show
  1. Counting_Columns_2_1.py +4 -36
Counting_Columns_2_1.py CHANGED
@@ -10,7 +10,7 @@ import fitz # PyMuPDF
10
  import os
11
 
12
  def get_text_from_pdf(input_pdf_path):
13
- pdf_document = fitz.open(input_pdf_path)
14
 
15
  for page_num in range(pdf_document.page_count):
16
  page = pdf_document[page_num]
@@ -120,14 +120,12 @@ def getNearestText(point_list, p):
120
 
121
  def getColumnsTypes(nearbyy, x):
122
  found_tuple = []
123
- selected_clms_points = []
124
  # Loop through the list of tuples
125
  for i in range(len(nearbyy)):
126
  for tpl in x:
127
  if (tpl[2] == nearbyy[i][0] and tpl[3] == nearbyy[i][1]) and tpl[4].startswith("C"):
128
  found_tuple.append(tpl[4])
129
- selected_clms_points.append(nearbyy[i])
130
- return found_tuple, selected_clms_points
131
 
132
  def generate_legend(found_tuple):
133
  word_freq = {}
@@ -140,34 +138,6 @@ def generate_legend(found_tuple):
140
  df = pd.DataFrame(data.items(), columns=['Column Type', 'Count'])
141
  return df
142
 
143
- def add_annotations_to_pdf(image, pdf_name, columns_types, slctd_clms_pts):
144
- image_width, image_height = image.size
145
-
146
- # Create a new PDF document
147
- pdf_document = fitz.open('pdf',pdf_name)
148
- page=pdf_document[0]
149
- rotationOld=page.rotation
150
- derotationMatrix=page.derotation_matrix
151
- print('rotationOld',rotationOld)
152
- if page.rotation!=0:
153
- rotationangle = page.rotation
154
- page.set_rotation(0)
155
- print('rotationnew',page.rotation)
156
-
157
-
158
- #Annotation for drawin lines as in the markups
159
- for i in range(len(columns_types)):
160
- x, y = slctd_clms_pts[i]
161
- text = columns_types[i]
162
- # Create an annotation (sticky note)
163
- annot = page.add_text_annot(fitz.Point(x, y), text)
164
- annot.set_border(width=0.2, dashes=(1, 2)) # Optional border styling
165
- annot.set_colors(stroke=(1, 0, 0), fill=None) # Set the stroke color to red
166
- annot.update()
167
-
168
- page.set_rotation(rotationOld)
169
- return pdf_document
170
-
171
  def mainfun(plan):
172
  texts_from_pdf = get_text_from_pdf(plan)
173
  img = convert2img(plan)
@@ -178,9 +148,8 @@ def mainfun(plan):
178
  # BROWN COLUMNS
179
  text_points = getTextsPoints(texts_from_pdf)
180
  nearby = getNearestText(text_points, column_points)
181
- columns_types, slctd_clms_pts = getColumnsTypes(nearby, texts_from_pdf)
182
  legend = generate_legend(columns_types)
183
- add_annotations_to_pdf(image, pdf_name, columns_types, slctd_clms_pts)
184
  else:
185
  # BLUE COLUMNS
186
  img_blue = changeGrayModify(img)
@@ -189,7 +158,6 @@ def mainfun(plan):
189
  column_points,mask_clmns, mask_walls = get_columns_info(outsu, img)
190
  text_points = getTextsPoints(texts_from_pdf)
191
  nearby = getNearestText(text_points, column_points)
192
- columns_types, slctd_clms_pts = getColumnsTypes(nearby, texts_from_pdf)
193
  legend = generate_legend(columns_types)
194
- add_annotations_to_pdf(image, pdf_name, columns_types, slctd_clms_pts)
195
  return legend
 
10
  import os
11
 
12
  def get_text_from_pdf(input_pdf_path):
13
+ pdf_document = fitz.open('pdf',input_pdf_path)
14
 
15
  for page_num in range(pdf_document.page_count):
16
  page = pdf_document[page_num]
 
120
 
121
  def getColumnsTypes(nearbyy, x):
122
  found_tuple = []
 
123
  # Loop through the list of tuples
124
  for i in range(len(nearbyy)):
125
  for tpl in x:
126
  if (tpl[2] == nearbyy[i][0] and tpl[3] == nearbyy[i][1]) and tpl[4].startswith("C"):
127
  found_tuple.append(tpl[4])
128
+ return found_tuple
 
129
 
130
  def generate_legend(found_tuple):
131
  word_freq = {}
 
138
  df = pd.DataFrame(data.items(), columns=['Column Type', 'Count'])
139
  return df
140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  def mainfun(plan):
142
  texts_from_pdf = get_text_from_pdf(plan)
143
  img = convert2img(plan)
 
148
  # BROWN COLUMNS
149
  text_points = getTextsPoints(texts_from_pdf)
150
  nearby = getNearestText(text_points, column_points)
151
+ columns_types = getColumnsTypes(nearby, texts_from_pdf)
152
  legend = generate_legend(columns_types)
 
153
  else:
154
  # BLUE COLUMNS
155
  img_blue = changeGrayModify(img)
 
158
  column_points,mask_clmns, mask_walls = get_columns_info(outsu, img)
159
  text_points = getTextsPoints(texts_from_pdf)
160
  nearby = getNearestText(text_points, column_points)
161
+ columns_types = getColumnsTypes(nearby, texts_from_pdf)
162
  legend = generate_legend(columns_types)
 
163
  return legend