Spaces:

AffordableAI
/

Real_Time_Safety_Monitoring

Sleeping

App Files Files Community

capradeepgujaran commited on Oct 23, 2024

Commit

bda20be

verified ·

1 Parent(s): bd1163f

Update app.py

Browse files

Files changed (1) hide show

app.py +85 -79

app.py CHANGED Viewed

@@ -7,7 +7,6 @@ from PIL import Image as PILImage
 import io
 import os
 import base64
-import random
 def create_monitor_interface():
     api_key = os.getenv("GROQ_API_KEY")
@@ -16,26 +15,26 @@ def create_monitor_interface():
         def __init__(self):
             self.client = Groq()
             self.model_name = "llama-3.2-90b-vision-preview"
-            self.max_image_size = (800, 800)  # Increased size for better visibility
-            self.colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255)]
         def resize_image(self, image):
             height, width = image.shape[:2]
-            aspect = width / height
-            if width > height:
-                new_width = min(self.max_image_size[0], width)
-                new_height = int(new_width / aspect)
-            else:
-                new_height = min(self.max_image_size[1], height)
-                new_width = int(new_height * aspect)
-            return cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
         def analyze_frame(self, frame: np.ndarray) -> str:
             if frame is None:
                 return "No frame received"
             # Convert and resize image
             if len(frame.shape) == 2:
                 frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
@@ -48,9 +47,9 @@ def create_monitor_interface():
             # High quality image for better analysis
             buffered = io.BytesIO()
             frame_pil.save(buffered,
-                          format="JPEG",
-                          quality=95,
-                          optimize=True)
             img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
             image_url = f"data:image/jpeg;base64,{img_base64}"
@@ -63,24 +62,24 @@ def create_monitor_interface():
                             "content": [
                                 {
                                     "type": "text",
-                                    "text": """Analyze this workplace image for safety conditions and hazards. Focus only on safety aspects such as:
-        1. Work posture and ergonomics at the shown position
-        2. Use of PPE and safety equipment
-        3. Tool handling and work techniques
-        4. Environmental conditions and surroundings
-        5. Equipment and machinery safety
-        6. Ground conditions and trip hazards
-        Do not identify or describe any individuals. Instead, describe the safety conditions and actions observed.
-        Format each safety observation as:
-        - <location>position:safety condition description</location>
-        Examples:
-        - <location>center:Improper kneeling posture without knee protection, risking joint injury</location>
-        - <location>left:Heavy machinery operating in close proximity to work area</location>
-        - <location>bottom:Uneven ground surface creating trip hazard near work zone</location>"""
                                 },
                                 {
                                     "type": "image_url",
@@ -91,15 +90,48 @@ def create_monitor_interface():
                             ]
                         }
                     ],
-                    temperature=0.7,
                     max_tokens=500,
                     stream=False
                 )
                 return completion.choices[0].message.content
             except Exception as e:
-                print(f"Detailed error: {str(e)}")
                 return f"Analysis Error: {str(e)}"
         def draw_observations(self, image, observations):
             """Draw accurate bounding boxes based on safety issue locations."""
             height, width = image.shape[:2]
@@ -110,7 +142,6 @@ def create_monitor_interface():
             def get_region_coordinates(position: str) -> tuple:
                 """Get coordinates based on position description."""
-                # Basic regions
                 regions = {
                     'center': (width//3, height//3, 2*width//3, 2*height//3),
                     'background': (0, 0, width, height),
@@ -122,7 +153,9 @@ def create_monitor_interface():
                     'bottom-left': (0, 2*height//3, width//3, height),
                     'bottom': (width//3, 2*height//3, 2*width//3, height),
                     'bottom-right': (2*width//3, 2*height//3, width, height),
-                    'ground': (0, 2*height//3, width, height)
                 }
                 # Find best matching region
@@ -131,7 +164,7 @@ def create_monitor_interface():
                     if key in position:
                         return regions[key]
-                return regions['center']  # Default to center if no match
             for idx, obs in enumerate(observations):
                 color = self.colors[idx % len(self.colors)]
@@ -152,51 +185,17 @@ def create_monitor_interface():
                 # Draw text background
                 cv2.rectangle(image,
-                             (text_x, text_y - label_size[1] - padding),
-                             (text_x + label_size[0] + padding, text_y),
-                             color, -1)
                 # Draw text
                 cv2.putText(image, label,
-                            (text_x + padding//2, text_y - padding//2),
-                            font, font_scale, (255, 255, 255), thickness)
             return image
-        def process_frame(self, frame: np.ndarray) -> tuple[np.ndarray, str]:
-            if frame is None:
-                return None, "No image provided"
-            analysis = self.analyze_frame(frame)
-            display_frame = frame.copy()
-            # Parse observations from the formatted response
-            observations = []
-            lines = analysis.split('\n')
-            for line in lines:
-                # Look for location tags in the line
-                if '<location>' in line and '</location>' in line:
-                    start = line.find('<location>') + len('<location>')
-                    end = line.find('</location>')
-                    location = line[start:end].strip()
-                    # Get the description that follows the location tag
-                    desc_start = line.find('</location>') + len('</location>:')
-                    description = line[desc_start:].strip()
-                    if location and description:
-                        observations.append({
-                            'location': location,
-                            'description': description
-                        })
-            # Draw observations if we found any
-            if observations:
-                annotated_frame = self.draw_observations(display_frame, observations)
-                return annotated_frame, analysis
-            return display_frame, analysis
     # Create the main interface
     monitor = SafetyMonitor()
@@ -225,6 +224,13 @@ def create_monitor_interface():
             outputs=[output_image, analysis_text]
         )
     return demo
 demo = create_monitor_interface()

 import io
 import os
 import base64
 def create_monitor_interface():
     api_key = os.getenv("GROQ_API_KEY")
         def __init__(self):
             self.client = Groq()
             self.model_name = "llama-3.2-90b-vision-preview"
+            self.max_image_size = (800, 800)
+            self.colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0), (255, 255, 0), (255, 0, 255)]
         def resize_image(self, image):
             height, width = image.shape[:2]
+            if height > self.max_image_size[1] or width > self.max_image_size[0]:
+                aspect = width / height
+                if width > height:
+                    new_width = self.max_image_size[0]
+                    new_height = int(new_width / aspect)
+                else:
+                    new_height = self.max_image_size[1]
+                    new_width = int(new_height * aspect)
+                return cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
+            return image
         def analyze_frame(self, frame: np.ndarray) -> str:
             if frame is None:
                 return "No frame received"
             # Convert and resize image
             if len(frame.shape) == 2:
                 frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
             # High quality image for better analysis
             buffered = io.BytesIO()
             frame_pil.save(buffered,
+                         format="JPEG",
+                         quality=95,
+                         optimize=True)
             img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
             image_url = f"data:image/jpeg;base64,{img_base64}"
                             "content": [
                                 {
                                     "type": "text",
+                                    "text": """Analyze this workplace image for safety conditions and hazards. Focus on:
+1. Work posture and ergonomics
+2. PPE and safety equipment usage
+3. Tool handling and techniques
+4. Environmental conditions
+5. Equipment and machinery safety
+6. Ground conditions and hazards
+Describe each safety condition observed, using this exact format:
+- <location>position</location>: detailed safety observation
+Examples:
+- <location>center</location>: Improper kneeling posture without knee protection, risking joint injury
+- <location>background</location>: Heavy machinery operating in close proximity creating hazard zone
+- <location>ground</location>: Uneven surface and debris creating trip hazards
+Be specific about locations and safety concerns."""
                                 },
                                 {
                                     "type": "image_url",
                             ]
                         }
                     ],
+                    temperature=0.5,
                     max_tokens=500,
                     stream=False
                 )
                 return completion.choices[0].message.content
             except Exception as e:
+                print(f"Analysis error: {str(e)}")
                 return f"Analysis Error: {str(e)}"
+        def process_frame(self, frame: np.ndarray) -> tuple[np.ndarray, str]:
+            if frame is None:
+                return None, "No image provided"
+            analysis = self.analyze_frame(frame)
+            display_frame = frame.copy()
+            # Parse observations from the formatted response
+            observations = []
+            lines = analysis.split('\n')
+            for line in lines:
+                if '<location>' in line and '</location>' in line:
+                    start = line.find('<location>') + len('<location>')
+                    end = line.find('</location>')
+                    location = line[start:end].strip()
+                    # Get the description that follows the location tags
+                    desc_start = line.find('</location>') + len('</location>:')
+                    description = line[desc_start:].strip()
+                    if location and description:
+                        observations.append({
+                            'location': location,
+                            'description': description
+                        })
+            # Draw observations if we found any
+            if observations:
+                annotated_frame = self.draw_observations(display_frame, observations)
+                return annotated_frame, analysis
+            return display_frame, analysis
         def draw_observations(self, image, observations):
             """Draw accurate bounding boxes based on safety issue locations."""
             height, width = image.shape[:2]
             def get_region_coordinates(position: str) -> tuple:
                 """Get coordinates based on position description."""
                 regions = {
                     'center': (width//3, height//3, 2*width//3, 2*height//3),
                     'background': (0, 0, width, height),
                     'bottom-left': (0, 2*height//3, width//3, height),
                     'bottom': (width//3, 2*height//3, 2*width//3, height),
                     'bottom-right': (2*width//3, 2*height//3, width, height),
+                    'ground': (0, 2*height//3, width, height),
+                    'machinery': (0, 0, width//2, height),
+                    'work-area': (width//4, height//4, 3*width//4, 3*height//4)
                 }
                 # Find best matching region
                     if key in position:
                         return regions[key]
+                return regions['center']
             for idx, obs in enumerate(observations):
                 color = self.colors[idx % len(self.colors)]
                 # Draw text background
                 cv2.rectangle(image,
+                            (text_x, text_y - label_size[1] - padding),
+                            (text_x + label_size[0] + padding, text_y),
+                            color, -1)
                 # Draw text
                 cv2.putText(image, label,
+                           (text_x + padding//2, text_y - padding//2),
+                           font, font_scale, (255, 255, 255), thickness)
             return image
     # Create the main interface
     monitor = SafetyMonitor()
             outputs=[output_image, analysis_text]
         )
+        gr.Markdown("""
+        ## Instructions:
+        1. Upload an image to analyze safety conditions
+        2. View annotated results showing safety concerns
+        3. Read detailed analysis of identified issues
+        """)
     return demo
 demo = create_monitor_interface()