Spaces:
Sleeping
Sleeping
Update Counting_Columns_2_1.py
Browse files- Counting_Columns_2_1.py +4 -36
Counting_Columns_2_1.py
CHANGED
|
@@ -10,7 +10,7 @@ import fitz # PyMuPDF
|
|
| 10 |
import os
|
| 11 |
|
| 12 |
def get_text_from_pdf(input_pdf_path):
|
| 13 |
-
pdf_document = fitz.open(input_pdf_path)
|
| 14 |
|
| 15 |
for page_num in range(pdf_document.page_count):
|
| 16 |
page = pdf_document[page_num]
|
|
@@ -120,14 +120,12 @@ def getNearestText(point_list, p):
|
|
| 120 |
|
| 121 |
def getColumnsTypes(nearbyy, x):
|
| 122 |
found_tuple = []
|
| 123 |
-
selected_clms_points = []
|
| 124 |
# Loop through the list of tuples
|
| 125 |
for i in range(len(nearbyy)):
|
| 126 |
for tpl in x:
|
| 127 |
if (tpl[2] == nearbyy[i][0] and tpl[3] == nearbyy[i][1]) and tpl[4].startswith("C"):
|
| 128 |
found_tuple.append(tpl[4])
|
| 129 |
-
|
| 130 |
-
return found_tuple, selected_clms_points
|
| 131 |
|
| 132 |
def generate_legend(found_tuple):
|
| 133 |
word_freq = {}
|
|
@@ -140,34 +138,6 @@ def generate_legend(found_tuple):
|
|
| 140 |
df = pd.DataFrame(data.items(), columns=['Column Type', 'Count'])
|
| 141 |
return df
|
| 142 |
|
| 143 |
-
def add_annotations_to_pdf(image, pdf_name, columns_types, slctd_clms_pts):
|
| 144 |
-
image_width, image_height = image.size
|
| 145 |
-
|
| 146 |
-
# Create a new PDF document
|
| 147 |
-
pdf_document = fitz.open('pdf',pdf_name)
|
| 148 |
-
page=pdf_document[0]
|
| 149 |
-
rotationOld=page.rotation
|
| 150 |
-
derotationMatrix=page.derotation_matrix
|
| 151 |
-
print('rotationOld',rotationOld)
|
| 152 |
-
if page.rotation!=0:
|
| 153 |
-
rotationangle = page.rotation
|
| 154 |
-
page.set_rotation(0)
|
| 155 |
-
print('rotationnew',page.rotation)
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
#Annotation for drawin lines as in the markups
|
| 159 |
-
for i in range(len(columns_types)):
|
| 160 |
-
x, y = slctd_clms_pts[i]
|
| 161 |
-
text = columns_types[i]
|
| 162 |
-
# Create an annotation (sticky note)
|
| 163 |
-
annot = page.add_text_annot(fitz.Point(x, y), text)
|
| 164 |
-
annot.set_border(width=0.2, dashes=(1, 2)) # Optional border styling
|
| 165 |
-
annot.set_colors(stroke=(1, 0, 0), fill=None) # Set the stroke color to red
|
| 166 |
-
annot.update()
|
| 167 |
-
|
| 168 |
-
page.set_rotation(rotationOld)
|
| 169 |
-
return pdf_document
|
| 170 |
-
|
| 171 |
def mainfun(plan):
|
| 172 |
texts_from_pdf = get_text_from_pdf(plan)
|
| 173 |
img = convert2img(plan)
|
|
@@ -178,9 +148,8 @@ def mainfun(plan):
|
|
| 178 |
# BROWN COLUMNS
|
| 179 |
text_points = getTextsPoints(texts_from_pdf)
|
| 180 |
nearby = getNearestText(text_points, column_points)
|
| 181 |
-
columns_types
|
| 182 |
legend = generate_legend(columns_types)
|
| 183 |
-
add_annotations_to_pdf(image, pdf_name, columns_types, slctd_clms_pts)
|
| 184 |
else:
|
| 185 |
# BLUE COLUMNS
|
| 186 |
img_blue = changeGrayModify(img)
|
|
@@ -189,7 +158,6 @@ def mainfun(plan):
|
|
| 189 |
column_points,mask_clmns, mask_walls = get_columns_info(outsu, img)
|
| 190 |
text_points = getTextsPoints(texts_from_pdf)
|
| 191 |
nearby = getNearestText(text_points, column_points)
|
| 192 |
-
columns_types
|
| 193 |
legend = generate_legend(columns_types)
|
| 194 |
-
add_annotations_to_pdf(image, pdf_name, columns_types, slctd_clms_pts)
|
| 195 |
return legend
|
|
|
|
| 10 |
import os
|
| 11 |
|
| 12 |
def get_text_from_pdf(input_pdf_path):
|
| 13 |
+
pdf_document = fitz.open('pdf',input_pdf_path)
|
| 14 |
|
| 15 |
for page_num in range(pdf_document.page_count):
|
| 16 |
page = pdf_document[page_num]
|
|
|
|
| 120 |
|
| 121 |
def getColumnsTypes(nearbyy, x):
|
| 122 |
found_tuple = []
|
|
|
|
| 123 |
# Loop through the list of tuples
|
| 124 |
for i in range(len(nearbyy)):
|
| 125 |
for tpl in x:
|
| 126 |
if (tpl[2] == nearbyy[i][0] and tpl[3] == nearbyy[i][1]) and tpl[4].startswith("C"):
|
| 127 |
found_tuple.append(tpl[4])
|
| 128 |
+
return found_tuple
|
|
|
|
| 129 |
|
| 130 |
def generate_legend(found_tuple):
|
| 131 |
word_freq = {}
|
|
|
|
| 138 |
df = pd.DataFrame(data.items(), columns=['Column Type', 'Count'])
|
| 139 |
return df
|
| 140 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
def mainfun(plan):
|
| 142 |
texts_from_pdf = get_text_from_pdf(plan)
|
| 143 |
img = convert2img(plan)
|
|
|
|
| 148 |
# BROWN COLUMNS
|
| 149 |
text_points = getTextsPoints(texts_from_pdf)
|
| 150 |
nearby = getNearestText(text_points, column_points)
|
| 151 |
+
columns_types = getColumnsTypes(nearby, texts_from_pdf)
|
| 152 |
legend = generate_legend(columns_types)
|
|
|
|
| 153 |
else:
|
| 154 |
# BLUE COLUMNS
|
| 155 |
img_blue = changeGrayModify(img)
|
|
|
|
| 158 |
column_points,mask_clmns, mask_walls = get_columns_info(outsu, img)
|
| 159 |
text_points = getTextsPoints(texts_from_pdf)
|
| 160 |
nearby = getNearestText(text_points, column_points)
|
| 161 |
+
columns_types = getColumnsTypes(nearby, texts_from_pdf)
|
| 162 |
legend = generate_legend(columns_types)
|
|
|
|
| 163 |
return legend
|