kamcio1989 commited on
Commit
82c6a9d
·
verified ·
1 Parent(s): d196433

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. models.py +269 -0
  2. requirements.txt +1 -0
  3. utils.py +239 -0
models.py ADDED
@@ -0,0 +1,269 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ from typing import List, Dict, Tuple, Any
4
+ import logging
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+ class FaceDetector:
9
+ """Face detection using Haar Cascade classifiers."""
10
+
11
+ def __init__(self):
12
+ self.face_cascade = None
13
+ self.eye_cascade = None
14
+ self.smile_cascade = None
15
+ self.load_models()
16
+
17
+ def load_models(self):
18
+ """Load Haar Cascade models."""
19
+ try:
20
+ self.face_cascade = cv2.CascadeClassifier(
21
+ cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
22
+ )
23
+ self.eye_cascade = cv2.CascadeClassifier(
24
+ cv2.data.haarcascades + 'haarcascade_eye.xml'
25
+ )
26
+ self.smile_cascade = cv2.CascadeClassifier(
27
+ cv2.data.haarcascades + 'haarcascade_smile.xml'
28
+ )
29
+ logger.info("Face detection models loaded successfully")
30
+ except Exception as e:
31
+ logger.error(f"Failed to load face detection models: {e}")
32
+
33
+ def detect_faces(self, image: np.ndarray, confidence_threshold: float = 0.7) -> List[Dict]:
34
+ """
35
+ Detect faces in the input image.
36
+
37
+ Args:
38
+ image: Input image in BGR format
39
+ confidence_threshold: Not used for Haar cascade (always returns high confidence)
40
+
41
+ Returns:
42
+ List of face detection results
43
+ """
44
+ if self.face_cascade is None:
45
+ return []
46
+
47
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
48
+ faces = self.face_cascade.detectMultiScale(
49
+ gray,
50
+ scaleFactor=1.1,
51
+ minNeighbors=5,
52
+ minSize=(30, 30),
53
+ flags=cv2.CASCADE_SCALE_IMAGE
54
+ )
55
+
56
+ results = []
57
+ for i, (x, y, w, h) in enumerate(faces):
58
+ # Detect eyes within face region
59
+ roi_gray = gray[y:y+h, x:x+w]
60
+ eyes = self.eye_cascade.detectMultiScale(roi_gray) if self.eye_cascade is not None else []
61
+
62
+ # Detect smile within face region
63
+ smiles = self.smile_cascade.detectMultiScale(
64
+ roi_gray,
65
+ scaleFactor=1.7,
66
+ minNeighbors=22,
67
+ minSize=(25, 25)
68
+ ) if self.smile_cascade is not None else []
69
+
70
+ results.append({
71
+ "id": i,
72
+ "bbox": [int(x), int(y), int(w), int(h)],
73
+ "confidence": 1.0, # Haar cascade doesn't provide confidence scores
74
+ "label": "face",
75
+ "features": {
76
+ "eyes_detected": len(eyes) if len(eyes) > 0 else 0,
77
+ "smile_detected": len(smiles) > 0
78
+ }
79
+ })
80
+
81
+ return results
82
+
83
+ class ObjectDetector:
84
+ """Object detection using MobileNet SSD."""
85
+
86
+ def __init__(self):
87
+ self.net = None
88
+ self.classes = [
89
+ "background", "aeroplane", "bicycle", "bird", "boat", "bottle",
90
+ "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse",
91
+ "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"
92
+ ]
93
+ self.load_model()
94
+
95
+ def load_model(self):
96
+ """Load the MobileNet SSD model."""
97
+ try:
98
+ # Try to load the model (files may not exist in all environments)
99
+ model_path = "MobileNetSSD_deploy.prototxt"
100
+ weights_path = "MobileNetSSD_deploy.caffemodel"
101
+ self.net = cv2.dnn.readNetFromCaffe(model_path, weights_path)
102
+ logger.info("Object detection model loaded successfully")
103
+ except:
104
+ logger.warning("Object detection model files not found. Using placeholder.")
105
+ self.net = None
106
+
107
+ def detect_objects(self, image: np.ndarray, confidence_threshold: float = 0.5) -> List[Dict]:
108
+ """
109
+ Detect objects in the input image.
110
+
111
+ Args:
112
+ image: Input image in BGR format
113
+ confidence_threshold: Minimum confidence for detection
114
+
115
+ Returns:
116
+ List of object detection results
117
+ """
118
+ if self.net is None:
119
+ # Return placeholder detections for demo purposes
120
+ return self._placeholder_detections(image)
121
+
122
+ try:
123
+ h, w = image.shape[:2]
124
+
125
+ # Create blob from image
126
+ blob = cv2.dnn.blobFromImage(
127
+ image, 0.007843, (300, 300), 127.5
128
+ )
129
+
130
+ # Pass blob through the network
131
+ self.net.setInput(blob)
132
+ detections = self.net.forward()
133
+
134
+ results = []
135
+ for i in range(detections.shape[2]):
136
+ confidence = detections[0, 0, i, 2]
137
+
138
+ if confidence > confidence_threshold:
139
+ idx = int(detections[0, 0, i, 1])
140
+
141
+ if idx < len(self.classes):
142
+ x1 = int(detections[0, 0, i, 3] * w)
143
+ y1 = int(detections[0, 0, i, 4] * h)
144
+ x2 = int(detections[0, 0, i, 5] * w)
145
+ y2 = int(detections[0, 0, i, 6] * h)
146
+
147
+ results.append({
148
+ "id": i,
149
+ "bbox": [x1, y1, x2 - x1, y2 - y1],
150
+ "confidence": float(confidence),
151
+ "label": self.classes[idx],
152
+ "class_id": idx
153
+ })
154
+
155
+ return results
156
+
157
+ except Exception as e:
158
+ logger.error(f"Object detection failed: {e}")
159
+ return []
160
+
161
+ def _placeholder_detections(self, image: np.ndarray) -> List[Dict]:
162
+ """
163
+ Generate placeholder detections for demo when model is not available.
164
+
165
+ Args:
166
+ image: Input image
167
+
168
+ Returns:
169
+ Placeholder detection results
170
+ """
171
+ h, w = image.shape[:2]
172
+
173
+ # Generate some random placeholder detections
174
+ placeholder_objects = [
175
+ {"label": "person", "confidence": 0.85, "size_factor": 0.3},
176
+ {"label": "car", "confidence": 0.75, "size_factor": 0.2},
177
+ {"label": "bottle", "confidence": 0.65, "size_factor": 0.1}
178
+ ]
179
+
180
+ results = []
181
+ for i, obj in enumerate(placeholder_objects):
182
+ # Random position with size based on factor
183
+ size = int(min(h, w) * obj["size_factor"])
184
+ x = np.random.randint(0, max(1, w - size))
185
+ y = np.random.randint(0, max(1, h - size))
186
+
187
+ results.append({
188
+ "id": i,
189
+ "bbox": [x, y, size, size],
190
+ "confidence": obj["confidence"],
191
+ "label": obj["label"],
192
+ "class_id": i + 1,
193
+ "placeholder": True
194
+ })
195
+
196
+ return results
197
+
198
+ # Detector instances
199
+ _face_detector = None
200
+ _object_detector = None
201
+
202
+ def get_face_detector() -> FaceDetector:
203
+ """Get or create face detector instance."""
204
+ global _face_detector
205
+ if _face_detector is None:
206
+ _face_detector = FaceDetector()
207
+ return _face_detector
208
+
209
+ def get_object_detector() -> ObjectDetector:
210
+ """Get or create object detector instance."""
211
+ global _object_detector
212
+ if _object_detector is None:
213
+ _object_detector = ObjectDetector()
214
+ return _object_detector
215
+
216
+ def detect_faces(image: np.ndarray, confidence_threshold: float = 0.7) -> List[Dict]:
217
+ """
218
+ Detect faces using the global face detector.
219
+
220
+ Args:
221
+ image: Input image
222
+ confidence_threshold: Confidence threshold
223
+
224
+ Returns:
225
+ Face detection results
226
+ """
227
+ detector = get_face_detector()
228
+ return detector.detect_faces(image, confidence_threshold)
229
+
230
+ def detect_objects(image: np.ndarray, confidence_threshold: float = 0.5) -> List[Dict]:
231
+ """
232
+ Detect objects using the global object detector.
233
+
234
+ Args:
235
+ image: Input image
236
+ confidence_threshold: Confidence threshold
237
+
238
+ Returns:
239
+ Object detection results
240
+ """
241
+ detector = get_object_detector()
242
+ return detector.detect_objects(image, confidence_threshold)
243
+
244
+ def get_model_info() -> Dict[str, Any]:
245
+ """
246
+ Get information about the loaded models.
247
+
248
+ Returns:
249
+ Dictionary with model information
250
+ """
251
+ face_detector = get_face_detector()
252
+ object_detector = get_object_detector()
253
+
254
+ return {
255
+ "face_detector": {
256
+ "model_type": "Haar Cascade",
257
+ "loaded": face_detector.face_cascade is not None,
258
+ "features": ["face", "eyes", "smile"],
259
+ "input_format": "BGR",
260
+ "output_format": "bounding boxes"
261
+ },
262
+ "object_detector": {
263
+ "model_type": "MobileNet-SSD",
264
+ "loaded": object_detector.net is not None,
265
+ "num_classes": len(object_detector.classes),
266
+ "input_size": "300x300",
267
+ "output_format": "bounding boxes with confidence"
268
+ }
269
+ }
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ gradio>=4.0.0
utils.py ADDED
@@ -0,0 +1,239 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ import json
4
+ from typing import Tuple, List, Dict, Any
5
+ import logging
6
+
7
+ # Configure logging
8
+ logging.basicConfig(level=logging.INFO)
9
+ logger = logging.getLogger(__name__)
10
+
11
+ def load_detection_models() -> Tuple[Any, Any, List[str]]:
12
+ """
13
+ Load face detection and object detection models.
14
+
15
+ Returns:
16
+ Tuple of (face_cascade, object_net, class_names)
17
+ """
18
+ try:
19
+ # Load face detection cascade
20
+ face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
21
+
22
+ # Load object detection model (MobileNet SSD)
23
+ model_path = "MobileNetSSD_deploy.prototxt"
24
+ weights_path = "MobileNetSSD_deploy.caffemodel"
25
+
26
+ try:
27
+ object_net = cv2.dnn.readNetFromCaffe(model_path, weights_path)
28
+ except:
29
+ # If model files don't exist, create a dummy network
30
+ logger.warning("Object detection model files not found. Using placeholder.")
31
+ object_net = None
32
+
33
+ # COCO class names
34
+ class_names = [
35
+ "background", "aeroplane", "bicycle", "bird", "boat", "bottle",
36
+ "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse",
37
+ "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"
38
+ ]
39
+
40
+ return face_cascade, object_net, class_names
41
+
42
+ except Exception as e:
43
+ logger.error(f"Error loading models: {e}")
44
+ return None, None, []
45
+
46
+ def process_image(
47
+ image: np.ndarray,
48
+ face_cascade: Any,
49
+ object_net: Any,
50
+ class_names: List[str],
51
+ enable_face_detection: bool,
52
+ enable_object_detection: bool,
53
+ face_confidence: float,
54
+ object_confidence: float
55
+ ) -> Tuple[np.ndarray, List[Dict], List[Dict]]:
56
+ """
57
+ Process the input image for face and object detection.
58
+
59
+ Args:
60
+ image: Input image
61
+ face_cascade: Face detection cascade
62
+ object_net: Object detection network
63
+ class_names: List of class names
64
+ enable_face_detection: Whether to detect faces
65
+ enable_object_detection: Whether to detect objects
66
+ face_confidence: Face detection confidence threshold
67
+ object_confidence: Object detection confidence threshold
68
+
69
+ Returns:
70
+ Tuple of (processed_image, face_results, object_results)
71
+ """
72
+ # Convert RGB to BGR for OpenCV processing
73
+ if len(image.shape) == 3 and image.shape[2] == 3:
74
+ image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
75
+ else:
76
+ image_bgr = image.copy()
77
+
78
+ gray = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2GRAY)
79
+
80
+ face_results = []
81
+ object_results = []
82
+
83
+ # Face detection
84
+ if enable_face_detection and face_cascade is not None:
85
+ faces = face_cascade.detectMultiScale(
86
+ gray,
87
+ scaleFactor=1.1,
88
+ minNeighbors=5,
89
+ minSize=(30, 30)
90
+ )
91
+
92
+ for i, (x, y, w, h) in enumerate(faces):
93
+ face_results.append({
94
+ "id": i,
95
+ "bbox": [int(x), int(y), int(w), int(h)],
96
+ "confidence": 1.0, # Haar cascade doesn't provide confidence
97
+ "label": "face"
98
+ })
99
+
100
+ # Object detection
101
+ if enable_object_detection and object_net is not None:
102
+ try:
103
+ h, w = image_bgr.shape[:2]
104
+ blob = cv2.dnn.blobFromImage(
105
+ image_bgr, 0.007843, (300, 300), 127.5
106
+ )
107
+ object_net.setInput(blob)
108
+ detections = object_net.forward()
109
+
110
+ for i in range(detections.shape[2]):
111
+ confidence = detections[0, 0, i, 2]
112
+
113
+ if confidence > object_confidence:
114
+ idx = int(detections[0, 0, i, 1])
115
+ if idx < len(class_names):
116
+ x1 = int(detections[0, 0, i, 3] * w)
117
+ y1 = int(detections[0, 0, i, 4] * h)
118
+ x2 = int(detections[0, 0, i, 5] * w)
119
+ y2 = int(detections[0, 0, i, 6] * h)
120
+
121
+ object_results.append({
122
+ "id": i,
123
+ "bbox": [x1, y1, x2 - x1, y2 - y1],
124
+ "confidence": float(confidence),
125
+ "label": class_names[idx]
126
+ })
127
+ except Exception as e:
128
+ logger.warning(f"Object detection failed: {e}")
129
+
130
+ return image, face_results, object_results
131
+
132
+ def draw_detections(
133
+ image: np.ndarray,
134
+ face_results: List[Dict],
135
+ object_results: List[Dict],
136
+ show_labels: bool,
137
+ box_color: str
138
+ ) -> np.ndarray:
139
+ """
140
+ Draw bounding boxes and labels on the image.
141
+
142
+ Args:
143
+ image: Input image
144
+ face_results: Face detection results
145
+ object_results: Object detection results
146
+ show_labels: Whether to show labels
147
+ box_color: Color for bounding boxes
148
+
149
+ Returns:
150
+ Image with drawn detections
151
+ """
152
+ # Convert to BGR for OpenCV drawing
153
+ if len(image.shape) == 3 and image.shape[2] == 3:
154
+ image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
155
+ else:
156
+ image_bgr = image.copy()
157
+
158
+ # Color mapping
159
+ color_map = {
160
+ "red": (0, 0, 255),
161
+ "green": (0, 255, 0),
162
+ "blue": (255, 0, 0),
163
+ "yellow": (0, 255, 255),
164
+ "purple": (255, 0, 255),
165
+ "orange": (0, 165, 255)
166
+ }
167
+
168
+ color = color_map.get(box_color, (0, 0, 255))
169
+
170
+ # Draw face detections
171
+ for face in face_results:
172
+ x, y, w, h = face["bbox"]
173
+ cv2.rectangle(image_bgr, (x, y), (x + w, y + h), color, 2)
174
+
175
+ if show_labels:
176
+ label = f"Face {face['id']}"
177
+ label_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0]
178
+ cv2.rectangle(
179
+ image_bgr,
180
+ (x, y - label_size[1] - 10),
181
+ (x + label_size[0], y),
182
+ color,
183
+ -1
184
+ )
185
+ cv2.putText(
186
+ image_bgr, label, (x, y - 5),
187
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2
188
+ )
189
+
190
+ # Draw object detections
191
+ for obj in object_results:
192
+ x, y, w, h = obj["bbox"]
193
+ cv2.rectangle(image_bgr, (x, y), (x + w, y + h), color, 2)
194
+
195
+ if show_labels:
196
+ label = f"{obj['label']}: {obj['confidence']:.2f}"
197
+ label_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0]
198
+ cv2.rectangle(
199
+ image_bgr,
200
+ (x, y - label_size[1] - 10),
201
+ (x + label_size[0], y),
202
+ color,
203
+ -1
204
+ )
205
+ cv2.putText(
206
+ image_bgr, label, (x, y - 5),
207
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2
208
+ )
209
+
210
+ # Convert back to RGB
211
+ return cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
212
+
213
+ def format_results(results: List[Dict], result_type: str) -> str:
214
+ """
215
+ Format detection results as a readable string.
216
+
217
+ Args:
218
+ results: Detection results
219
+ result_type: Type of results (face/object)
220
+
221
+ Returns:
222
+ Formatted string
223
+ """
224
+ if not results:
225
+ return f"No {result_type}s detected"
226
+
227
+ output = [f"Detected {len(results)} {result_type}s:"]
228
+ for result in results:
229
+ bbox = result["bbox"]
230
+ output.append(
231
+ f" - {result_type.capitalize()} {result['id']}: "
232
+ f"Position({bbox[0]}, {bbox[1]}), Size({bbox[2]}x{bbox[3]})"
233
+ )
234
+ if "confidence" in result:
235
+ output.append(f" Confidence: {result['confidence']:.2f}")
236
+ if "label" in result and result["label"] != result_type:
237
+ output.append(f" Label: {result['label']}")
238
+
239
+ return "\n".join(output)