| import gradio as gr |
| import cv2 |
| import numpy as np |
| from groq import Groq |
| import time |
| from PIL import Image as PILImage |
| import io |
| import os |
| import base64 |
| import random |
|
|
| def create_monitor_interface(): |
| api_key = os.getenv("GROQ_API_KEY") |
| |
| class SafetyMonitor: |
| def __init__(self): |
| self.client = Groq() |
| self.model_name = "llama-3.2-90b-vision-preview" |
| self.max_image_size = (800, 800) |
| self.colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255)] |
| |
| def resize_image(self, image): |
| height, width = image.shape[:2] |
| aspect = width / height |
| |
| if width > height: |
| new_width = min(self.max_image_size[0], width) |
| new_height = int(new_width / aspect) |
| else: |
| new_height = min(self.max_image_size[1], height) |
| new_width = int(new_height * aspect) |
| |
| return cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA) |
|
|
| def analyze_frame(self, frame: np.ndarray) -> str: |
| if frame is None: |
| return "No frame received" |
| |
| |
| if len(frame.shape) == 2: |
| frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB) |
| elif len(frame.shape) == 3 and frame.shape[2] == 4: |
| frame = cv2.cvtColor(frame, cv2.COLOR_RGBA2RGB) |
| |
| frame = self.resize_image(frame) |
| frame_pil = PILImage.fromarray(frame) |
| |
| |
| buffered = io.BytesIO() |
| frame_pil.save(buffered, |
| format="JPEG", |
| quality=50, |
| optimize=True) |
| img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8') |
| image_url = f"data:image/jpeg;base64,{img_base64}" |
| |
| try: |
| completion = self.client.chat.completions.create( |
| model=self.model_name, |
| messages=[ |
| { |
| "role": "user", |
| "content": [ |
| { |
| "type": "text", |
| "text": """Analyze this workplace image and describe each safety concern in this format: |
| - <location>Description</location> |
| Use one line per issue, starting with a dash and location in tags.""" |
| }, |
| { |
| "type": "image_url", |
| "image_url": { |
| "url": image_url |
| } |
| } |
| ] |
| }, |
| { |
| "role": "assistant", |
| "content": "" |
| } |
| ], |
| temperature=0.1, |
| max_tokens=500, |
| top_p=1, |
| stream=False, |
| stop=None |
| ) |
| return completion.choices[0].message.content |
| except Exception as e: |
| print(f"Detailed error: {str(e)}") |
| return f"Analysis Error: {str(e)}" |
|
|
| def draw_observations(self, image, observations): |
| height, width = image.shape[:2] |
| font = cv2.FONT_HERSHEY_SIMPLEX |
| font_scale = 0.5 |
| thickness = 2 |
| |
| |
| for idx, obs in enumerate(observations): |
| color = self.colors[idx % len(self.colors)] |
| |
| |
| box_width = width // 3 |
| box_height = height // 3 |
| x = random.randint(0, width - box_width) |
| y = random.randint(0, height - box_height) |
| |
| |
| cv2.rectangle(image, (x, y), (x + box_width, y + box_height), color, 2) |
| |
| |
| label = obs[:40] + "..." if len(obs) > 40 else obs |
| label_size = cv2.getTextSize(label, font, font_scale, thickness)[0] |
| cv2.rectangle(image, (x, y - 20), (x + label_size[0], y), color, -1) |
| cv2.putText(image, label, (x, y - 5), font, font_scale, (255, 255, 255), thickness) |
| |
| return image |
|
|
| def process_frame(self, frame: np.ndarray) -> tuple[np.ndarray, str]: |
| if frame is None: |
| return None, "No image provided" |
| |
| analysis = self.analyze_frame(frame) |
| display_frame = self.resize_image(frame.copy()) |
| |
| |
| observations = [] |
| for line in analysis.split('\n'): |
| line = line.strip() |
| if line.startswith('-'): |
| |
| if '<location>' in line and '</location>' in line: |
| start = line.find('<location>') + len('<location>') |
| end = line.find('</location>') |
| observation = line[end + len('</location>'):].strip() |
| else: |
| observation = line[1:].strip() |
| if observation: |
| observations.append(observation) |
| |
| |
| annotated_frame = self.draw_observations(display_frame, observations) |
| |
| return annotated_frame, analysis |
|
|
| |
| monitor = SafetyMonitor() |
| |
| with gr.Blocks() as demo: |
| gr.Markdown("# Safety Analysis System powered by Llama 3.2 90b vision") |
| |
| with gr.Row(): |
| input_image = gr.Image(label="Upload Image") |
| output_image = gr.Image(label="Annotated Results") |
| |
| analysis_text = gr.Textbox(label="Detailed Analysis", lines=5) |
| |
| def analyze_image(image): |
| if image is None: |
| return None, "No image provided" |
| try: |
| processed_frame, analysis = monitor.process_frame(image) |
| return processed_frame, analysis |
| except Exception as e: |
| print(f"Processing error: {str(e)}") |
| return None, f"Error processing image: {str(e)}" |
| |
| input_image.change( |
| fn=analyze_image, |
| inputs=input_image, |
| outputs=[output_image, analysis_text] |
| ) |
|
|
| return demo |
|
|
| demo = create_monitor_interface() |
| demo.launch() |