Spaces:

AffordableAI
/

Real_Time_Safety_Monitoring

Sleeping

App Files Files Community

capradeepgujaran commited on Oct 24, 2024

Commit

64b8a91

verified ·

1 Parent(s): 32acaa8

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -381

app.py CHANGED Viewed

@@ -2,277 +2,24 @@ import gradio as gr
 import cv2
 import numpy as np
 from groq import Groq
 from PIL import Image as PILImage
 import io
-import base64
-import torch
-import warnings
-from typing import Tuple, List, Dict, Optional
 import os
-# Suppress warnings
-warnings.filterwarnings('ignore', category=FutureWarning)
-warnings.filterwarnings('ignore', category=UserWarning)
-class RobustSafetyMonitor:
-    def __init__(self):
-        """Initialize the safety detection tool with improved configuration."""
-        self.client = Groq()
-        self.model_name = "llama-3.2-11b-vision-preview"
-        self.max_image_size = (800, 800)
-        self.colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0), (255, 255, 0), (255, 0, 255)]
-        # Load YOLOv5 with optimized settings
-        self.yolo_model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
-        self.yolo_model.conf = 0.25  # Lower confidence threshold
-        self.yolo_model.iou = 0.45   # Adjusted IOU threshold
-        self.yolo_model.classes = None  # Detect all classes
-        self.yolo_model.max_det = 50  # Increased maximum detections
-        self.yolo_model.cpu()
-        self.yolo_model.eval()
-        # Construction-specific keywords
-        self.construction_keywords = [
-            'person', 'worker', 'helmet', 'tool', 'machine', 'equipment',
-            'brick', 'block', 'pile', 'stack', 'surface', 'floor', 'ground',
-            'construction', 'building', 'structure'
-        ]
-    def preprocess_image(self, frame: np.ndarray) -> np.ndarray:
-        """Process image for analysis."""
-        if frame is None:
-            raise ValueError("No image provided")
-        if len(frame.shape) == 2:
-            frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
-        elif len(frame.shape) == 3 and frame.shape[2] == 4:
-            frame = cv2.cvtColor(frame, cv2.COLOR_RGBA2RGB)
-        return self.resize_image(frame)
-    def resize_image(self, image: np.ndarray) -> np.ndarray:
-        """Resize image while maintaining aspect ratio."""
-        height, width = image.shape[:2]
-        if height > self.max_image_size[1] or width > self.max_image_size[0]:
-            aspect = width / height
-            if width > height:
-                new_width = self.max_image_size[0]
-                new_height = int(new_width / aspect)
-            else:
-                new_height = self.max_image_size[1]
-                new_width = int(new_height * aspect)
-            return cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
-        return image
-    def encode_image(self, frame: np.ndarray) -> str:
-        """Convert image to base64 encoding."""
-        try:
-            frame_pil = PILImage.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
-            buffered = io.BytesIO()
-            frame_pil.save(buffered, format="JPEG", quality=95)
-            img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
-            return f"data:image/jpeg;base64,{img_base64}"
-        except Exception as e:
-            raise ValueError(f"Error encoding image: {str(e)}")
-    def detect_objects(self, frame: np.ndarray) -> Tuple[np.ndarray, Dict]:
-        """Enhanced object detection using YOLOv5."""
-        try:
-            # Ensure proper image format
-            if len(frame.shape) == 2:
-                frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
-            elif frame.shape[2] == 4:
-                frame = cv2.cvtColor(frame, cv2.COLOR_RGBA2RGB)
-            # Run inference with augmentation
-            with torch.no_grad():
-                results = self.yolo_model(frame, augment=True)
-            # Get detections
-            bbox_data = results.xyxy[0].cpu().numpy()
-            labels = results.names
-            # Filter and process detections
-            processed_boxes = []
-            for box in bbox_data:
-                x1, y1, x2, y2, conf, cls = box
-                if conf > 0.25:  # Keep lower confidence threshold
-                    processed_boxes.append(box)
-            return np.array(processed_boxes), labels
-        except Exception as e:
-            print(f"Error in object detection: {str(e)}")
-            return np.array([]), {}
-    def analyze_frame(self, frame: np.ndarray) -> Tuple[List[Dict], str]:
-        """Perform safety analysis using Llama Vision."""
-        if frame is None:
-            return [], "No frame received"
-        try:
-            frame = self.preprocess_image(frame)
-            image_base64 = self.encode_image(frame)
-            completion = self.client.chat.completions.create(
-                model=self.model_name,
-                messages=[
-                    {
-                        "role": "user",
-                        "content": [
-                            {
-                                "type": "text",
-                                "text": """Analyze this workplace image for safety risks. Focus on:
-                                1. Worker posture and positioning
-                                2. Equipment and tool safety
-                                3. Environmental hazards
-                                4. PPE compliance
-                                5. Material handling
-                                List each risk on a new line starting with 'Risk:'.
-                                Format: Risk: [Object/Area] - [Detailed description of hazard]"""
-                            },
-                            {
-                                "type": "image_url",
-                                "image_url": {
-                                    "url": image_base64
-                                }
-                            }
-                        ]
-                    }
-                ],
-                temperature=0.7,
-                max_tokens=1024,
-                stream=False
-            )
-            try:
-                response = completion.choices[0].message.content
-            except AttributeError:
-                response = str(completion.choices[0].message)
-            safety_issues = self.parse_safety_analysis(response)
-            return safety_issues, response
-        except Exception as e:
-            print(f"Analysis error: {str(e)}")
-            return [], f"Analysis Error: {str(e)}"
-    def draw_bounding_boxes(self, image: np.ndarray, bboxes: np.ndarray,
-                          labels: Dict, safety_issues: List[Dict]) -> np.ndarray:
-        """Improved bounding box visualization."""
-        image_copy = image.copy()
-        font = cv2.FONT_HERSHEY_SIMPLEX
-        font_scale = 0.5
-        thickness = 2
-        for idx, bbox in enumerate(bboxes):
-            try:
-                x1, y1, x2, y2, conf, class_id = bbox
-                label = labels[int(class_id)]
-                # Check if object is construction-related
-                is_relevant = any(keyword in label.lower() for keyword in self.construction_keywords)
-                if is_relevant or conf > 0.35:
-                    color = self.colors[idx % len(self.colors)]
-                    # Convert coordinates to integers
-                    x1, y1, x2, y2 = map(int, [x1, y1, x2, y2])
-                    # Draw bounding box
-                    cv2.rectangle(image_copy, (x1, y1), (x2, y2), color, thickness)
-                    # Check for associated safety issues
-                    risk_found = False
-                    for safety_issue in safety_issues:
-                        issue_keywords = safety_issue.get('object', '').lower().split()
-                        if any(keyword in label.lower() for keyword in issue_keywords):
-                            label_text = f"Risk: {safety_issue.get('description', '')}"
-                            y_pos = max(y1 - 10, 20)
-                            cv2.putText(image_copy, label_text, (x1, y_pos), font,
-                                      font_scale, (0, 0, 255), thickness)
-                            risk_found = True
-                            break
-                    if not risk_found:
-                        label_text = f"{label} {conf:.2f}"
-                        y_pos = max(y1 - 10, 20)
-                        cv2.putText(image_copy, label_text, (x1, y_pos), font,
-                                  font_scale, color, thickness)
-                    # Mark high-risk areas
-                    if conf > 0.5 and any(risk_word in label.lower() for risk_word in
-                                        ['worker', 'person', 'equipment', 'machine']):
-                        cv2.circle(image_copy, (int((x1 + x2)/2), int((y1 + y2)/2)),
-                                 5, (0, 0, 255), -1)
-            except Exception as e:
-                print(f"Error drawing box: {str(e)}")
-                continue
-        return image_copy
-    def process_frame(self, frame: np.ndarray) -> Tuple[Optional[np.ndarray], str]:
-        """Main processing pipeline for safety analysis."""
-        if frame is None:
-            return None, "No image provided"
-        try:
-            # Detect objects
-            bbox_data, labels = self.detect_objects(frame)
-            # Get safety analysis
-            safety_issues, analysis = self.analyze_frame(frame)
-            # Draw annotations
-            annotated_frame = self.draw_bounding_boxes(frame, bbox_data, labels, safety_issues)
-            return annotated_frame, analysis
-        except Exception as e:
-            print(f"Processing error: {str(e)}")
-            return None, f"Error processing image: {str(e)}"
-    def parse_safety_analysis(self, analysis: str) -> List[Dict]:
-        """Parse the safety analysis text."""
-        safety_issues = []
-        if not isinstance(analysis, str):
-            return safety_issues
-        for line in analysis.split('\n'):
-            if "risk:" in line.lower():
-                try:
-                    parts = line.lower().split('risk:', 1)[1].strip()
-                    if '-' in parts:
-                        obj, desc = parts.split('-', 1)
-                    else:
-                        obj, desc = parts, parts
-                    safety_issues.append({
-                        "object": obj.strip(),
-                        "description": desc.strip()
-                    })
-                except Exception as e:
-                    print(f"Error parsing line: {line}, Error: {str(e)}")
-                    continue
-        return safety_issues
 def create_monitor_interface():
     api_key = os.getenv("GROQ_API_KEY")
     class SafetyMonitor:
         def __init__(self):
-            """Initialize Safety Monitor with configuration."""
             self.client = Groq()
             self.model_name = "llama-3.2-90b-vision-preview"
-            self.max_image_size = (800, 800)
-            self.colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0), (255, 255, 0), (255, 0, 255)]
         def resize_image(self, image):
-            """Resize image while maintaining aspect ratio."""
             height, width = image.shape[:2]
             aspect = width / height
@@ -286,7 +33,6 @@ def create_monitor_interface():
             return cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
         def analyze_frame(self, frame: np.ndarray) -> str:
-            """Analyze frame for safety concerns."""
             if frame is None:
                 return "No frame received"
@@ -299,11 +45,11 @@ def create_monitor_interface():
             frame = self.resize_image(frame)
             frame_pil = PILImage.fromarray(frame)
-            # Convert to base64
             buffered = io.BytesIO()
             frame_pil.save(buffered,
                          format="JPEG",
-                         quality=95,  # High quality for better analysis
                          optimize=True)
             img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
             image_url = f"data:image/jpeg;base64,{img_base64}"
@@ -317,21 +63,9 @@ def create_monitor_interface():
                             "content": [
                                 {
                                     "type": "text",
-                                    "text": """Analyze this workplace image for safety hazards. For each hazard:
-                                    1. Specify the exact location (e.g., center, top-left, bottom-right)
-                                    2. Describe the safety concern in detail
-                                    Format each finding as:
-                                    - <location>position:detailed safety description</location>
-                                    Consider:
-                                    - PPE usage and compliance
-                                    - Ergonomic risks
-                                    - Equipment safety
-                                    - Environmental hazards
-                                    - Work procedures
-                                    - Material handling
-                                    """
                                 },
                                 {
                                     "type": "image_url",
@@ -340,139 +74,78 @@ def create_monitor_interface():
                                     }
                                 }
                             ]
                         }
                     ],
-                    temperature=0.5,
-                    max_tokens=500,
-                    stream=False
                 )
                 return completion.choices[0].message.content
             except Exception as e:
-                print(f"Analysis error: {str(e)}")
                 return f"Analysis Error: {str(e)}"
         def draw_observations(self, image, observations):
-            """Draw safety observations with accurate locations."""
             height, width = image.shape[:2]
             font = cv2.FONT_HERSHEY_SIMPLEX
             font_scale = 0.5
             thickness = 2
-            def get_region_coordinates(location_text):
-                """Get coordinates based on location description."""
-                location_text = location_text.lower()
-                regions = {
-                    # Basic positions
-                    'center': (width//3, height//3, 2*width//3, 2*height//3),
-                    'top': (width//4, 0, 3*width//4, height//3),
-                    'bottom': (width//4, 2*height//3, 3*width//4, height),
-                    'left': (0, height//4, width//3, 3*height//4),
-                    'right': (2*width//3, height//4, width, 3*height//4),
-                    'top-left': (0, 0, width//3, height//3),
-                    'top-right': (2*width//3, 0, width, height//3),
-                    'bottom-left': (0, 2*height//3, width//3, height),
-                    'bottom-right': (2*width//3, 2*height//3, width, height),
-                    # Work areas
-                    'workspace': (width//4, height//4, 3*width//4, 3*height//4),
-                    'machine': (2*width//3, 0, width, height),
-                    'equipment': (2*width//3, height//3, width, 2*height//3),
-                    'material': (0, 2*height//3, width//3, height),
-                    'ground': (0, 2*height//3, width, height),
-                    'floor': (0, 3*height//4, width, height),
-                    # Body regions
-                    'body': (width//3, height//3, 2*width//3, 2*height//3),
-                    'hands': (width//2, height//2, 3*width//4, 2*height//3),
-                    'head': (width//3, 0, 2*width//3, height//4),
-                    'feet': (width//3, 3*height//4, 2*width//3, height),
-                    'back': (width//3, height//3, 2*width//3, 2*height//3),
-                    'knees': (width//3, 2*height//3, 2*width//3, height),
-                    # Special areas
-                    'workspace': (width//4, height//4, 3*width//4, 3*height//4),
-                    'working-area': (width//4, height//4, 3*width//4, 3*height//4),
-                    'surrounding': (0, 0, width, height),
-                    'background': (0, 0, width, height)
-                }
-                # Find best matching region
-                best_match = 'center'  # default
-                max_match_length = 0
-                for region_name in regions.keys():
-                    if region_name in location_text and len(region_name) > max_match_length:
-                        best_match = region_name
-                        max_match_length = len(region_name)
-                return regions[best_match]
             for idx, obs in enumerate(observations):
                 color = self.colors[idx % len(self.colors)]
-                # Split location and description if available
-                parts = obs.split(':')
-                if len(parts) >= 2:
-                    location = parts[0]
-                    description = ':'.join(parts[1:])
-                else:
-                    location = 'center'
-                    description = obs
-                # Get region coordinates
-                x1, y1, x2, y2 = get_region_coordinates(location)
                 # Draw rectangle
-                cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
-                # Add label
-                label = description[:50] + "..." if len(description) > 50 else description
                 label_size = cv2.getTextSize(label, font, font_scale, thickness)[0]
-                # Position text above box
-                text_x = max(0, x1)
-                text_y = max(20, y1 - 5)
-                # Draw text background
-                cv2.rectangle(image,
-                            (text_x, text_y - label_size[1] - 5),
-                            (text_x + label_size[0], text_y),
-                            color, -1)
-                # Draw text
-                cv2.putText(image, label, (text_x, text_y - 5),
-                           font, font_scale, (255, 255, 255), thickness)
             return image
         def process_frame(self, frame: np.ndarray) -> tuple[np.ndarray, str]:
-            """Process frame and generate safety analysis."""
             if frame is None:
                 return None, "No image provided"
             analysis = self.analyze_frame(frame)
             display_frame = self.resize_image(frame.copy())
-            # Parse observations
             observations = []
             for line in analysis.split('\n'):
                 line = line.strip()
                 if line.startswith('-'):
                     if '<location>' in line and '</location>' in line:
                         start = line.find('<location>') + len('<location>')
                         end = line.find('</location>')
-                        observation = line[start:end].strip()
-                        if observation:
-                            observations.append(observation)
-            # Draw observations
-            if observations:
-                annotated_frame = self.draw_observations(display_frame, observations)
-                return annotated_frame, analysis
-            return display_frame, analysis
-    # Create interface
     monitor = SafetyMonitor()
     with gr.Blocks() as demo:
@@ -480,7 +153,7 @@ def create_monitor_interface():
         with gr.Row():
             input_image = gr.Image(label="Upload Image")
-            output_image = gr.Image(label="Safety Analysis")
         analysis_text = gr.Textbox(label="Detailed Analysis", lines=5)
@@ -500,15 +173,7 @@ def create_monitor_interface():
             outputs=[output_image, analysis_text]
         )
-        gr.Markdown("""
-        ## Instructions:
-        1. Upload a workplace image
-        2. View detected safety concerns
-        3. Check detailed analysis
-        """)
     return demo
-if __name__ == "__main__":
-    demo = create_monitor_interface()
-    demo.launch()

 import cv2
 import numpy as np
 from groq import Groq
+import time
 from PIL import Image as PILImage
 import io
 import os
+import base64
+import random
 def create_monitor_interface():
     api_key = os.getenv("GROQ_API_KEY")
     class SafetyMonitor:
         def __init__(self):
             self.client = Groq()
             self.model_name = "llama-3.2-90b-vision-preview"
+            self.max_image_size = (800, 800)  # Increased size for better visibility
+            self.colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255)]
         def resize_image(self, image):
             height, width = image.shape[:2]
             aspect = width / height
             return cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
         def analyze_frame(self, frame: np.ndarray) -> str:
             if frame is None:
                 return "No frame received"
             frame = self.resize_image(frame)
             frame_pil = PILImage.fromarray(frame)
+            # Convert to base64 with minimal quality
             buffered = io.BytesIO()
             frame_pil.save(buffered,
                          format="JPEG",
+                         quality=30,
                          optimize=True)
             img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
             image_url = f"data:image/jpeg;base64,{img_base64}"
                             "content": [
                                 {
                                     "type": "text",
+                                    "text": """Analyze this workplace image and describe each safety concern in this format:
+                                    - <location>Description</location>
+                                    Use one line per issue, starting with a dash and location in tags."""
                                 },
                                 {
                                     "type": "image_url",
                                     }
                                 }
                             ]
+                        },
+                        {
+                            "role": "assistant",
+                            "content": ""
                         }
                     ],
+                    temperature=0.1,
+                    max_tokens=150,
+                    top_p=1,
+                    stream=False,
+                    stop=None
                 )
                 return completion.choices[0].message.content
             except Exception as e:
+                print(f"Detailed error: {str(e)}")
                 return f"Analysis Error: {str(e)}"
         def draw_observations(self, image, observations):
             height, width = image.shape[:2]
             font = cv2.FONT_HERSHEY_SIMPLEX
             font_scale = 0.5
             thickness = 2
+            # Generate random positions for each observation
             for idx, obs in enumerate(observations):
                 color = self.colors[idx % len(self.colors)]
+                # Generate random box position
+                box_width = width // 3
+                box_height = height // 3
+                x = random.randint(0, width - box_width)
+                y = random.randint(0, height - box_height)
                 # Draw rectangle
+                cv2.rectangle(image, (x, y), (x + box_width, y + box_height), color, 2)
+                # Add label with background
+                label = obs[:40] + "..." if len(obs) > 40 else obs
                 label_size = cv2.getTextSize(label, font, font_scale, thickness)[0]
+                cv2.rectangle(image, (x, y - 20), (x + label_size[0], y), color, -1)
+                cv2.putText(image, label, (x, y - 5), font, font_scale, (255, 255, 255), thickness)
             return image
         def process_frame(self, frame: np.ndarray) -> tuple[np.ndarray, str]:
             if frame is None:
                 return None, "No image provided"
             analysis = self.analyze_frame(frame)
             display_frame = self.resize_image(frame.copy())
+            # Parse observations from the analysis
             observations = []
             for line in analysis.split('\n'):
                 line = line.strip()
                 if line.startswith('-'):
+                    # Extract text between <location> tags if present
                     if '<location>' in line and '</location>' in line:
                         start = line.find('<location>') + len('<location>')
                         end = line.find('</location>')
+                        observation = line[end + len('</location>'):].strip()
+                    else:
+                        observation = line[1:].strip()  # Remove the dash
+                    if observation:
+                        observations.append(observation)
+            # Draw observations on the image
+            annotated_frame = self.draw_observations(display_frame, observations)
+            return annotated_frame, analysis
+    # Create the main interface
     monitor = SafetyMonitor()
     with gr.Blocks() as demo:
         with gr.Row():
             input_image = gr.Image(label="Upload Image")
+            output_image = gr.Image(label="Annotated Results")
         analysis_text = gr.Textbox(label="Detailed Analysis", lines=5)
             outputs=[output_image, analysis_text]
         )
     return demo
+demo = create_monitor_interface()
+demo.launch()