Marthee commited on
Commit
a455705
·
verified ·
1 Parent(s): 4a0f3d6

Upload doors_fasterrcnn.py

Browse files
Files changed (1) hide show
  1. doors_fasterrcnn.py +206 -0
doors_fasterrcnn.py ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """MartheDeployment_Doors_fasterRCNN.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1kgEtpfNt0jxSwPRhOzODIC6P_prg-c4L
8
+
9
+ ## Libraries
10
+ """
11
+
12
+ # from google.colab.patches import cv2_imshow
13
+ import cv2
14
+ import numpy as np
15
+ import pandas as pd
16
+
17
+ import statistics
18
+ from statistics import mode
19
+
20
+ from PIL import Image
21
+
22
+ # pip install PyPDF2
23
+
24
+ # pip install PyMuPDF
25
+
26
+ # pip install pip install PyMuPDF==1.19.0
27
+
28
+ import io
29
+
30
+ # !pip install pypdfium2
31
+ import pypdfium2 as pdfium
32
+
33
+ import fitz # PyMuPDF
34
+
35
+ import os
36
+
37
+ #drive.mount("/content/drive", force_remount=True)
38
+
39
+ import torch
40
+ from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
41
+ from PIL import Image, ImageDraw
42
+ import torchvision.transforms.functional as F
43
+ import matplotlib.pyplot as plt
44
+
45
+ """# updated for (fullpath, pdf_name)
46
+
47
+
48
+
49
+ """
50
+
51
+ def convert2pillow(path):
52
+ pdf = pdfium.PdfDocument(path)
53
+ page = pdf.get_page(0)
54
+ pil_image = page.render().to_pil()
55
+ return pil_image
56
+
57
+ import torch
58
+ import torchvision
59
+ from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
60
+
61
+ # Function to get the model
62
+ def get_model(num_classes):
63
+ # Load a pre-trained Faster R-CNN model with a ResNet-50-FPN backbone
64
+ model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
65
+
66
+ # Get the number of input features for the classifier
67
+ in_features = model.roi_heads.box_predictor.cls_score.in_features
68
+
69
+ # Replace the pre-trained head with a new one for our number of classes
70
+ model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
71
+
72
+ return model
73
+
74
+ def ev_model(img, model, device, threshold):
75
+ image_tensor = F.to_tensor(img).unsqueeze(0)
76
+ image_tensor = image_tensor.to(device)
77
+ model.eval()
78
+
79
+ with torch.no_grad():
80
+ predictions = model(image_tensor)
81
+
82
+ single_boxes = []
83
+ double_boxes = []
84
+ for element in range(len(predictions[0]['boxes'])):
85
+ score = predictions[0]['scores'][element].item()
86
+ if score > threshold:
87
+ if predictions[0]['labels'][element].item() == 1:
88
+ single_boxes.append(predictions[0]['boxes'][element].tolist())
89
+ else:
90
+ double_boxes.append(predictions[0]['boxes'][element].tolist())
91
+
92
+ return single_boxes, double_boxes
93
+
94
+ def calculate_width(bbox):
95
+ #if looking right or left, width < height
96
+ bbox_width = bbox[2] - bbox[0]
97
+ bbox_height = bbox[3] - bbox[1]
98
+ if bbox_width > bbox_height:
99
+ door_width = bbox_width
100
+ else:
101
+ door_width = bbox_height
102
+ return door_width
103
+
104
+ def calculate_midpoint(top_left, bottom_right):
105
+ x1, y1 = top_left
106
+ x2, y2 = bottom_right
107
+ # Calculate the midpoint
108
+ xm = int((x1 + x2) / 2)
109
+ ym = int((y1 + y2) / 2)
110
+ return (xm, ym)
111
+
112
+ def mid_points_bbox(bbox):
113
+ midpoints = []
114
+ for i in range(len(bbox)):
115
+ x1 = int(bbox[i][0])
116
+ y1 = int(bbox[i][1])
117
+ x2 = int(bbox[i][2])
118
+ y2 = int(bbox[i][3])
119
+ top_left_corner = (x1, y1)
120
+ bottom_right_corner = (x2, y2)
121
+ door_width = calculate_width(bbox[i])
122
+ midpoint = calculate_midpoint(top_left_corner, bottom_right_corner)
123
+ midpoints.append((midpoint, door_width))
124
+ return midpoints
125
+
126
+ def create_annotations(door_kind, midpoints):
127
+ door = door_kind
128
+ annotations = []
129
+ for i in range(len(midpoints)):
130
+ annotations.append((midpoints[i][0][0],midpoints[i][0][1], door+f" with {midpoints[i][1]} pixels width"))
131
+ return annotations
132
+
133
+ def add_annotations_to_pdf(image, pdf_name, annotation_s, annotation_d):
134
+ image_width, image_height = image.size
135
+
136
+ # Create a new PDF document
137
+ pdf_document = fitz.open()
138
+
139
+ # Add a new page to the document with the same dimensions as the image
140
+ page = pdf_document.new_page(width=image_width, height=image_height)
141
+
142
+ # Insert the image into the PDF page
143
+ image_stream = io.BytesIO()
144
+ image.save(image_stream, format="PNG")
145
+ page.insert_image(page.rect, stream=image_stream.getvalue())
146
+
147
+ # Add annotations
148
+ for annotation in annotation_s:
149
+ x, y, text = annotation
150
+ # Create an annotation (sticky note)
151
+ annot = page.add_text_annot(fitz.Point(x, y), text)
152
+ annot.set_border(width=0.2, dashes=(1, 2)) # Optional border styling
153
+ annot.set_colors(stroke=(1, 0, 0), fill=None) # Set the stroke color to red
154
+ annot.update()
155
+ for annotation in annotation_d:
156
+ x, y, text = annotation
157
+ # Create an annotation (sticky note)
158
+ annot = page.add_text_annot(fitz.Point(x, y), text)
159
+ annot.set_border(width=0.2, dashes=(1, 2)) # Optional border styling
160
+ annot.set_colors(stroke=(0, 1, 0), fill=None) # Set the stroke color to red
161
+ annot.update()
162
+
163
+ output_pdf_path = pdf_name+"_annotated.pdf"
164
+ # Save the PDF with annotations
165
+ return pdf_document
166
+ # pdf_document.save(output_pdf_path)
167
+ # pdf_document.close()
168
+
169
+ def main_run(pdf_fullpath, weights_path, pdf_name):
170
+ img_pillow = convert2pillow(pdf_fullpath)
171
+ new_image = img_pillow.resize((2384,1684))
172
+ # Specify the number of classes (including the background)
173
+ num_classes = 6 # Ensure this matches the saved model's number of classes
174
+ # Load the model with the specified number of classes
175
+ model = get_model(num_classes)
176
+ # Load the saved model's state dictionary with map_location to handle CPU
177
+ device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
178
+ model.load_state_dict(torch.load(weights_path, map_location=device))
179
+ # Set the model to evaluation mode
180
+ model.eval()
181
+ # Move the model to the appropriate device
182
+ model.to(device)
183
+
184
+ #START INFERENCE
185
+ sbox, dbox = ev_model(new_image, model, device, 0.6)
186
+
187
+ single_info = mid_points_bbox(sbox)
188
+ double_info = mid_points_bbox(dbox)
189
+
190
+ single_annotations = create_annotations("single door", single_info)
191
+ double_annotations = create_annotations("double door", double_info)
192
+
193
+ pdf_document=add_annotations_to_pdf(new_image, pdf_name, single_annotations, double_annotations)
194
+
195
+ page=pdf_document[0]
196
+ pix = page.get_pixmap() # render page to an image
197
+ pl=Image.frombytes('RGB', [pix.width,pix.height],pix.samples)
198
+ img=np.array(pl)
199
+ annotatedimg = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
200
+
201
+ return annotatedimg,pdf_document
202
+
203
+ # model_path = '/content/drive/MyDrive/combined.pth'
204
+ # #pdf_name = data
205
+ # for i in range(len(fullpath)):
206
+ # main_run(fullpath[i], model_path, pdf_name[i])