Spaces:

AffordableAI
/

Real_Time_Safety_Monitoring

Sleeping

App Files Files Community

capradeepgujaran commited on Oct 23, 2024

Commit

519704e

verified ·

1 Parent(s): 771e08a

Update app.py

Browse files

Files changed (1) hide show

app.py +172 -173

app.py CHANGED Viewed

@@ -10,13 +10,14 @@ import base64
 class SafetyMonitor:
     def __init__(self):
         self.client = Groq()
         self.model_name = "llama-3.2-90b-vision-preview"
         self.max_image_size = (800, 800)
         self.colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0), (255, 255, 0), (255, 0, 255)]
     def preprocess_image(self, frame):
-        """Prepare image for analysis."""
         if len(frame.shape) == 2:
             frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
         elif len(frame.shape) == 3 and frame.shape[2] == 4:
@@ -46,8 +47,8 @@ class SafetyMonitor:
         img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
         return f"data:image/jpeg;base64,{img_base64}"
-    def get_scene_context(self, image: np.ndarray) -> str:
-        """Get scene understanding to determine context."""
         try:
             image_url = self.encode_image(image)
             completion = self.client.chat.completions.create(
@@ -58,15 +59,15 @@ class SafetyMonitor:
                         "content": [
                             {
                                 "type": "text",
-                                "text": """Describe the key areas and elements visible in this construction/workplace image. Include:
                                 1. Worker locations and activities
-                                2. Equipment and machinery positions
-                                3. Material storage or work areas
-                                4. Environmental features
-                                5. Access ways and pathways
-                                Format as:
-                                - Element: precise location description"""
                             },
                             {
                                 "type": "image_url",
@@ -86,174 +87,172 @@ class SafetyMonitor:
             print(f"Scene analysis error: {str(e)}")
             return ""
-    def analyze_frame(self, frame: np.ndarray) -> tuple[str, dict]:
-            """Analyze frame and return both safety analysis and scene context."""
-            if frame is None:
-                return "No frame received", {}
-            # First get scene understanding
-            scene_context = self.get_scene_context(frame)
-            scene_regions = self.parse_scene_context(scene_context)
-            # Then perform safety analysis with context
-            frame = self.preprocess_image(frame)
-            image_url = self.encode_image(frame)
-            try:
-                completion = self.client.chat.completions.create(
-                    model=self.model_name,
-                    messages=[
-                        {
-                            "role": "user",
-                            "content": [
-                                {
-                                    "type": "text",
-                                    "text": """Analyze this workplace image for safety concerns. For each identified hazard:
-                                    1. Specify the exact location where the hazard exists
-                                    2. Describe the specific safety issue
-                                    3. Note any violations or risks
-                                    Format each observation exactly as:
-                                    - <location>area:detailed hazard description</location>
-                                    Consider all safety aspects:
-                                    - PPE compliance
-                                    - Ergonomic risks
-                                    - Equipment safety
-                                    - Environmental hazards
-                                    - Material handling
-                                    - Access/egress
-                                    - Work procedures
-                                    """
-                                },
-                                {
-                                    "type": "image_url",
-                                    "image_url": {
-                                        "url": image_url
-                                    }
                                 }
-                            ]
-                        }
-                    ],
-                    temperature=0.5,
-                    max_tokens=500,
-                    stream=False
-                )
-                return completion.choices[0].message.content, scene_regions
-            except Exception as e:
-                print(f"Analysis error: {str(e)}")
-                return f"Analysis Error: {str(e)}", scene_regions
-        def parse_scene_context(self, context: str) -> dict:
-            """Parse scene context to get region mapping."""
-            regions = {}
-            for line in context.split('\n'):
-                if line.strip().startswith('-'):
-                    parts = line.strip('- ').split(':')
-                    if len(parts) == 2:
-                        element_type = parts[0].strip()
-                        location = parts[1].strip()
-                        regions[element_type] = location
-            return regions
-        def get_region_coordinates(self, location: str, image_shape: tuple) -> tuple:
-            """Convert location description to coordinates."""
-            height, width = image_shape[:2]
-            # Parse location description for spatial information
-            location = location.lower()
-            x1, y1, x2, y2 = 0, 0, width, height  # Default to full image
-            # Horizontal position
-            if 'left' in location:
-                x2 = width // 2
-            elif 'right' in location:
-                x1 = width // 2
-            elif 'center' in location:
-                x1 = width // 4
-                x2 = 3 * width // 4
-            # Vertical position
-            if 'top' in location:
-                y2 = height // 2
-            elif 'bottom' in location:
-                y1 = height // 2
-            elif 'middle' in location or 'center' in location:
-                y1 = height // 4
-                y2 = 3 * height // 4
-            return (x1, y1, x2, y2)
-        def draw_observations(self, image: np.ndarray, observations: list, scene_regions: dict) -> np.ndarray:
-            """Draw safety observations using scene context."""
-            height, width = image.shape[:2]
-            font = cv2.FONT_HERSHEY_SIMPLEX
-            font_scale = 0.5
-            thickness = 2
-            padding = 10
-            for idx, obs in enumerate(observations):
-                color = self.colors[idx % len(self.colors)]
-                # Find best matching region from scene context or parse location directly
-                location = obs['location'].lower()
-                x1, y1, x2, y2 = self.get_region_coordinates(location, image.shape)
-                # Draw observation box
-                cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
-                # Add label
-                label = obs['description'][:50] + "..." if len(obs['description']) > 50 else obs['description']
-                label_size, _ = cv2.getTextSize(label, font, font_scale, thickness)
-                # Position text above the box
-                text_x = max(0, x1)
-                text_y = max(label_size[1] + padding, y1 - padding)
-                # Draw text background
-                cv2.rectangle(image,
-                             (text_x, text_y - label_size[1] - padding),
-                             (text_x + label_size[0] + padding, text_y),
-                             color, -1)
-                # Draw text
-                cv2.putText(image, label,
-                           (text_x + padding//2, text_y - padding//2),
-                           font, font_scale, (255, 255, 255), thickness)
-            return image
-        def process_frame(self, frame: np.ndarray) -> tuple[np.ndarray, str]:
-                """Process frame with safety analysis and visualization."""
-                if frame is None:
-                    return None, "No image provided"
-                # Get analysis and scene context
-                analysis, scene_regions = self.analyze_frame(frame)
-                display_frame = frame.copy()
-                # Parse observations
-                observations = []
-                for line in analysis.split('\n'):
-                    line = line.strip()
-                    if line.startswith('-') and '<location>' in line and '</location>' in line:
-                        start = line.find('<location>') + len('<location>')
-                        end = line.find('</location>')
-                        location_description = line[start:end].strip()
-                        if ':' in location_description:
-                            location, description = location_description.split(':', 1)
-                            observations.append({
-                                'location': location.strip(),
-                                'description': description.strip()
-                            })
-                # Draw observations if any were found
-                if observations:
-                    annotated_frame = self.draw_observations(display_frame, observations, scene_regions)
-                    return annotated_frame, analysis
-                return display_frame, analysis
 def create_monitor_interface():
     monitor = SafetyMonitor()

 class SafetyMonitor:
     def __init__(self):
+        """Initialize Safety Monitor with configuration."""
         self.client = Groq()
         self.model_name = "llama-3.2-90b-vision-preview"
         self.max_image_size = (800, 800)
         self.colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0), (255, 255, 0), (255, 0, 255)]
     def preprocess_image(self, frame):
+        """Process image for analysis."""
         if len(frame.shape) == 2:
             frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
         elif len(frame.shape) == 3 and frame.shape[2] == 4:
         img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
         return f"data:image/jpeg;base64,{img_base64}"
+    def get_scene_context(self, image):
+        """Analyze the scene context."""
         try:
             image_url = self.encode_image(image)
             completion = self.client.chat.completions.create(
                         "content": [
                             {
                                 "type": "text",
+                                "text": """Analyze this workplace image and identify key areas and elements. Include:
                                 1. Worker locations and activities
+                                2. Equipment and machinery
+                                3. Materials and storage
+                                4. Access routes and paths
+                                5. Hazardous areas
+                                Format each observation as:
+                                - Element: specific location in image"""
                             },
                             {
                                 "type": "image_url",
             print(f"Scene analysis error: {str(e)}")
             return ""
+    def analyze_frame(self, frame):
+        """Perform safety analysis on the frame."""
+        if frame is None:
+            return "No frame received", {}
+        frame = self.preprocess_image(frame)
+        image_url = self.encode_image(frame)
+        try:
+            completion = self.client.chat.completions.create(
+                model=self.model_name,
+                messages=[
+                    {
+                        "role": "user",
+                        "content": [
+                            {
+                                "type": "text",
+                                "text": """Analyze this image for safety hazards. For each hazard:
+                                1. Specify the precise location in the image
+                                2. Describe the safety concern or violation
+                                3. Indicate the potential risk
+                                Format each finding as:
+                                - <location>position:detailed safety concern</location>
+                                Look for all types of safety issues:
+                                - PPE compliance
+                                - Ergonomic risks
+                                - Equipment safety
+                                - Environmental hazards
+                                - Material handling
+                                - Work procedures
+                                - Access and egress
+                                - Housekeeping"""
+                            },
+                            {
+                                "type": "image_url",
+                                "image_url": {
+                                    "url": image_url
                                 }
+                            }
+                        ]
+                    }
+                ],
+                temperature=0.5,
+                max_tokens=500,
+                stream=False
+            )
+            return completion.choices[0].message.content, {}
+        except Exception as e:
+            print(f"Analysis error: {str(e)}")
+            return f"Analysis Error: {str(e)}", {}
+    def get_region_coordinates(self, position, image_shape):
+        """Convert textual position to coordinates."""
+        height, width = image_shape[:2]
+        # Parse position for spatial information
+        position = position.lower()
+        # Base coordinates (full image)
+        x1, y1, x2, y2 = 0, 0, width, height
+        # Define regions
+        regions = {
+            'center': (width//3, height//3, 2*width//3, 2*height//3),
+            'top': (width//3, 0, 2*width//3, height//3),
+            'bottom': (width//3, 2*height//3, 2*width//3, height),
+            'left': (0, height//3, width//3, 2*height//3),
+            'right': (2*width//3, height//3, width, 2*height//3),
+            'top-left': (0, 0, width//3, height//3),
+            'top-right': (2*width//3, 0, width, height//3),
+            'bottom-left': (0, 2*height//3, width//3, height),
+            'bottom-right': (2*width//3, 2*height//3, width, height),
+            'upper': (0, 0, width, height//2),
+            'lower': (0, height//2, width, height),
+            'middle': (0, height//3, width, 2*height//3)
+        }
+        # Find best matching region
+        best_match = None
+        max_match = 0
+        for region, coords in regions.items():
+            if region in position:
+                words = region.split('-')
+                matches = sum(1 for word in words if word in position)
+                if matches > max_match:
+                    max_match = matches
+                    best_match = coords
+        return best_match if best_match else (x1, y1, x2, y2)
+    def draw_observations(self, image, observations):
+        """Draw bounding boxes and labels for safety observations."""
+        height, width = image.shape[:2]
+        font = cv2.FONT_HERSHEY_SIMPLEX
+        font_scale = 0.5
+        thickness = 2
+        padding = 10
+        for idx, obs in enumerate(observations):
+            color = self.colors[idx % len(self.colors)]
+            # Get coordinates for this observation
+            x1, y1, x2, y2 = self.get_region_coordinates(obs['location'], image.shape)
+            # Draw rectangle
+            cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
+            # Add label with background
+            label = obs['description'][:50] + "..." if len(obs['description']) > 50 else obs['description']
+            label_size, _ = cv2.getTextSize(label, font, font_scale, thickness)
+            # Position text above the box
+            text_x = max(0, x1)
+            text_y = max(label_size[1] + padding, y1 - padding)
+            # Draw text background
+            cv2.rectangle(image,
+                         (text_x, text_y - label_size[1] - padding),
+                         (text_x + label_size[0] + padding, text_y),
+                         color, -1)
+            # Draw text
+            cv2.putText(image, label,
+                       (text_x + padding//2, text_y - padding//2),
+                       font, font_scale, (255, 255, 255), thickness)
+        return image
+    def process_frame(self, frame):
+        """Main processing pipeline for safety analysis."""
+        if frame is None:
+            return None, "No image provided"
+        try:
+            # Get analysis
+            analysis, _ = self.analyze_frame(frame)
+            display_frame = frame.copy()
+            # Parse observations
+            observations = []
+            for line in analysis.split('\n'):
+                line = line.strip()
+                if line.startswith('-') and '<location>' in line and '</location>' in line:
+                    start = line.find('<location>') + len('<location>')
+                    end = line.find('</location>')
+                    location_description = line[start:end].strip()
+                    if ':' in location_description:
+                        location, description = location_description.split(':', 1)
+                        observations.append({
+                            'location': location.strip(),
+                            'description': description.strip()
+                        })
+            # Draw observations
+            if observations:
+                annotated_frame = self.draw_observations(display_frame, observations)
+                return annotated_frame, analysis
+            return display_frame, analysis
+        except Exception as e:
+            print(f"Processing error: {str(e)}")
+            return None, f"Error processing image: {str(e)}"
 def create_monitor_interface():
     monitor = SafetyMonitor()