First_agent_template

Sleeping

App Files Files Community

HayLahav commited on Feb 26, 2025

Commit

7fdeaa3

verified ·

1 Parent(s): 6d66207

Update app.py

Browse files

Files changed (1) hide show

app.py +167 -102

app.py CHANGED Viewed

@@ -1,32 +1,85 @@
 from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel, load_tool, tool
 import datetime
 import requests
 import pytz
 import yaml
-from tools.final_answer import FinalAnswerTool
-from ultralytics import YOLO  # YOLOv8 model
-import cv2
-import numpy as np
-import os
 import tempfile
 import gradio as gr
-from Gradio_UI import GradioUI
 @tool
-def get_yolov8_coco_detections(video_path: str) -> str:
     """Detects objects in an MP4 video file using YOLOv8.
     Args:
         video_path: Path to the input video.
     Returns:
-        Processed video file path with detections.
     """
     model = YOLO("yolov8s.pt")  # Load pre-trained YOLOv8 model
     cap = cv2.VideoCapture(video_path)  # Load video
     if not cap.isOpened():
-        return f"Error: Could not open video file at {video_path}"
     width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
@@ -37,12 +90,14 @@ def get_yolov8_coco_detections(video_path: str) -> str:
     out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
     unique_detections = set()
     while cap.isOpened():
         ret, frame = cap.read()
         if not ret:
             break  # End of video
         results = model(frame)  # Run YOLOv8 inference
         for r in results:
@@ -71,13 +126,14 @@ def get_yolov8_coco_detections(video_path: str) -> str:
     return {
         "output_path": output_path,
-        "detected_objects": [{"object": obj} for obj in detections_list]
     }
 @tool
 def detect_road_lanes(video_path: str) -> dict:
-    """Detects lane markings in an MP4 video using YOLOv8-seg.
     Args:
         video_path: Path to the input video.
@@ -109,94 +165,96 @@ def detect_road_lanes(video_path: str) -> dict:
     # For lane detection specifically
     lane_count = 0
     detected_lanes = []
     while cap.isOpened():
         ret, frame = cap.read()
         if not ret:
             break
-        # Run segmentation model for lane detection
-        # YOLOv8-seg can identify roads and potentially lane markings
-        results = model(frame, classes=[0, 1, 2, 3, 7])  # Focus on relevant classes like road, person, car
         # Create a visualization frame
         vis_frame = frame.copy()
-        # Use the segmentation masks to help identify lanes
-        if hasattr(results[0], 'masks') and results[0].masks is not None:
-            masks = results[0].masks
-            # Enhance lane detection with traditional computer vision
-            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
-            blur = cv2.GaussianBlur(gray, (5, 5), 0)
-            edges = cv2.Canny(blur, 50, 150)
-            # Create a mask focused on the lower portion of the image (where lanes typically are)
-            mask = np.zeros_like(edges)
-            height, width = edges.shape
-            polygon = np.array([[(0, height), (width, height), (width, height//2), (0, height//2)]], dtype=np.int32)
-            cv2.fillPoly(mask, polygon, 255)
-            masked_edges = cv2.bitwise_and(edges, mask)
-            # Apply Hough transform to detect lines
-            lines = cv2.HoughLinesP(masked_edges, 1, np.pi/180, 50, minLineLength=100, maxLineGap=50)
-            current_lane_count = 0
-            lane_lines = []
-            if lines is not None:
-                for line in lines:
-                    x1, y1, x2, y2 = line[0]
-                    # Filter out horizontal lines (not lanes)
-                    if abs(x2 - x1) > 0 and abs(y2 - y1) / abs(x2 - x1) > 0.5:  # Slope threshold
-                        cv2.line(vis_frame, (x1, y1), (x2, y2), (0, 0, 255), 2)  # Red lane markings
-                        lane_lines.append(((x1, y1), (x2, y2)))
-                # Count lanes by clustering similar lines
-                if lane_lines:
-                    # Simple clustering: group lines with similar slopes
-                    slopes = []
-                    for ((x1, y1), (x2, y2)) in lane_lines:
-                        # Avoid division by zero
-                        if x2 != x1:
-                            slope = (y2 - y1) / (x2 - x1)
-                            slopes.append(slope)
-                    # Cluster slopes to identify unique lanes
-                    unique_slopes = []
-                    for slope in slopes:
-                        is_new = True
-                        for us in unique_slopes:
-                            if abs(slope - us) < 0.2:  # Threshold for considering slopes similar
-                                is_new = False
-                                break
-                        if is_new:
-                            unique_slopes.append(slope)
-                    current_lane_count = len(unique_slopes)
-                    lane_count = max(lane_count, current_lane_count)
-                    # Update detected lanes information
-                    detected_lanes = [{"lane_id": i, "slope": s} for i, s in enumerate(unique_slopes)]
         # Add lane count text
         cv2.putText(vis_frame, f"Detected lanes: {current_lane_count}", (50, 50),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
-        # Add segmentation visualization
-        if hasattr(results[0], 'masks') and results[0].masks is not None:
-            masks = results[0].masks
-            for mask in masks:
-                # Convert mask to binary image
-                seg_mask = mask.data.cpu().numpy()[0].astype(np.uint8) * 255
-                # Resize mask to frame size
-                seg_mask = cv2.resize(seg_mask, (width, height))
-                # Create colored overlay for the mask
-                color_mask = np.zeros_like(vis_frame)
-                color_mask[seg_mask > 0] = [0, 255, 255]  # Yellow color for segmentation
-                # Add the mask as semi-transparent overlay
-                vis_frame = cv2.addWeighted(vis_frame, 1, color_mask, 0.3, 0)
         out.write(vis_frame)
@@ -206,7 +264,8 @@ def detect_road_lanes(video_path: str) -> dict:
     return {
         "output_path": output_path,
         "detected_lanes": detected_lanes,
-        "lane_count": lane_count
     }
@@ -485,6 +544,16 @@ def get_current_time_in_timezone(timezone: str) -> str:
 # Setup FinalAnswerTool
 final_answer = FinalAnswerTool()
 # Setup model
 model = HfApiModel(
     max_tokens=2096,
@@ -493,18 +562,9 @@ model = HfApiModel(
     custom_role_conversions=None,
 )
-# Create or load prompts.yaml
-if not os.path.exists("prompts.yaml"):
-    prompts = {
-        "default": "You are an autonomous driving assistant that helps analyze road scenes and make driving decisions.",
-        "prefix": "Analyze the following driving scenario: ",
-        "suffix": "Provide a detailed analysis with safety recommendations."
-    }
-    with open("prompts.yaml", 'w') as file:
-        yaml.dump(prompts, file)
-else:
-    with open("prompts.yaml", 'r') as stream:
-        prompt_templates = yaml.safe_load(stream)
 # Define agent
 agent = CodeAgent(
@@ -620,11 +680,16 @@ def create_gradio_interface():
     return demo
-# Try to use the GradioUI wrapper if it's available, otherwise use our custom interface
 try:
-    # Launch using the GradioUI wrapper from the original code
-    GradioUI(agent).launch()
 except Exception as e:
-    print(f"Error using GradioUI wrapper: {e}")
     print("Launching custom Gradio interface instead")
     create_gradio_interface().launch()

+# Install required packages first
+import os
+import sys
+import subprocess
+# Function to install packages if they are not already installed
+def install_packages():
+    required_packages = [
+        'ultralytics',
+        'smolagents',
+        'pytz',
+        'pyyaml',
+        'opencv-python',
+        'numpy',
+        'gradio'
+    ]
+    for package in required_packages:
+        try:
+            __import__(package)
+            print(f"{package} is already installed.")
+        except ImportError:
+            print(f"Installing {package}...")
+            subprocess.check_call([sys.executable, "-m", "pip", "install", package])
+            print(f"{package} has been installed.")
+# Install required packages
+print("Checking and installing required packages...")
+install_packages()
+# Now import the required modules
 from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel, load_tool, tool
 import datetime
 import requests
 import pytz
 import yaml
 import tempfile
+import numpy as np
+import cv2
 import gradio as gr
+from ultralytics import YOLO  # YOLOv8 model
+# Create tools directory and FinalAnswerTool if they don't exist
+os.makedirs("tools", exist_ok=True)
+if not os.path.exists("tools/final_answer.py"):
+    with open("tools/final_answer.py", "w") as f:
+        f.write("""
+class FinalAnswerTool:
+    def __call__(self, answer):
+        return {"answer": answer}
+""")
+# Import FinalAnswerTool
+sys.path.append(os.getcwd())
+from tools.final_answer import FinalAnswerTool
+# Create prompts.yaml if it doesn't exist
+if not os.path.exists("prompts.yaml"):
+    prompts = {
+        "default": "You are an autonomous driving assistant that helps analyze road scenes and make driving decisions.",
+        "prefix": "Analyze the following driving scenario: ",
+        "suffix": "Provide a detailed analysis with safety recommendations."
+    }
+    with open("prompts.yaml", 'w') as file:
+        yaml.dump(prompts, file)
 @tool
+def get_yolov8_coco_detections(video_path: str) -> dict:
     """Detects objects in an MP4 video file using YOLOv8.
     Args:
         video_path: Path to the input video.
     Returns:
+        Dictionary with processed video path and detection results.
     """
     model = YOLO("yolov8s.pt")  # Load pre-trained YOLOv8 model
     cap = cv2.VideoCapture(video_path)  # Load video
     if not cap.isOpened():
+        return {"error": f"Could not open video file at {video_path}"}
     width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
     out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
     unique_detections = set()
+    frame_count = 0
     while cap.isOpened():
         ret, frame = cap.read()
         if not ret:
             break  # End of video
+        frame_count += 1
         results = model(frame)  # Run YOLOv8 inference
         for r in results:
     return {
         "output_path": output_path,
+        "detected_objects": [{"object": obj} for obj in detections_list],
+        "frames_processed": frame_count
     }
 @tool
 def detect_road_lanes(video_path: str) -> dict:
+    """Detects lane markings in an MP4 video using YOLOv8-seg and traditional CV techniques.
     Args:
         video_path: Path to the input video.
     # For lane detection specifically
     lane_count = 0
     detected_lanes = []
+    frame_count = 0
     while cap.isOpened():
         ret, frame = cap.read()
         if not ret:
             break
+        frame_count += 1
         # Create a visualization frame
         vis_frame = frame.copy()
+        # Enhance lane detection with traditional computer vision
+        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+        blur = cv2.GaussianBlur(gray, (5, 5), 0)
+        edges = cv2.Canny(blur, 50, 150)
+        # Create a mask focused on the lower portion of the image (where lanes typically are)
+        mask = np.zeros_like(edges)
+        height, width = edges.shape
+        polygon = np.array([[(0, height), (width, height), (width, height//2), (0, height//2)]], dtype=np.int32)
+        cv2.fillPoly(mask, polygon, 255)
+        masked_edges = cv2.bitwise_and(edges, mask)
+        # Apply Hough transform to detect lines
+        lines = cv2.HoughLinesP(masked_edges, 1, np.pi/180, 50, minLineLength=100, maxLineGap=50)
+        current_lane_count = 0
+        lane_lines = []
+        if lines is not None:
+            for line in lines:
+                x1, y1, x2, y2 = line[0]
+                # Filter out horizontal lines (not lanes)
+                if abs(x2 - x1) > 0 and abs(y2 - y1) / abs(x2 - x1) > 0.5:  # Slope threshold
+                    cv2.line(vis_frame, (x1, y1), (x2, y2), (0, 0, 255), 2)  # Red lane markings
+                    lane_lines.append(((x1, y1), (x2, y2)))
+            # Count lanes by clustering similar lines
+            if lane_lines:
+                # Simple clustering: group lines with similar slopes
+                slopes = []
+                for ((x1, y1), (x2, y2)) in lane_lines:
+                    # Avoid division by zero
+                    if x2 != x1:
+                        slope = (y2 - y1) / (x2 - x1)
+                        slopes.append(slope)
+                # Cluster slopes to identify unique lanes
+                unique_slopes = []
+                for slope in slopes:
+                    is_new = True
+                    for us in unique_slopes:
+                        if abs(slope - us) < 0.2:  # Threshold for considering slopes similar
+                            is_new = False
+                            break
+                    if is_new:
+                        unique_slopes.append(slope)
+                current_lane_count = len(unique_slopes)
+                lane_count = max(lane_count, current_lane_count)
+                # Update detected lanes information
+                detected_lanes = [{"lane_id": i, "slope": s} for i, s in enumerate(unique_slopes)]
         # Add lane count text
         cv2.putText(vis_frame, f"Detected lanes: {current_lane_count}", (50, 50),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
+        # Try running YOLOv8 segmentation if available
+        try:
+            # Run segmentation model for road detection
+            seg_results = model(frame, classes=[0, 1, 2, 3, 7])  # Focus on relevant classes
+            if hasattr(seg_results[0], 'masks') and seg_results[0].masks is not None:
+                masks = seg_results[0].masks
+                for seg_mask in masks:
+                    # Convert mask to binary image
+                    mask_data = seg_mask.data.cpu().numpy()[0].astype(np.uint8) * 255
+                    # Resize mask to frame size
+                    mask_data = cv2.resize(mask_data, (width, height))
+                    # Create colored overlay for the mask
+                    color_mask = np.zeros_like(vis_frame)
+                    color_mask[mask_data > 0] = [0, 255, 255]  # Yellow color for segmentation
+                    # Add the mask as semi-transparent overlay
+                    vis_frame = cv2.addWeighted(vis_frame, 1, color_mask, 0.3, 0)
+        except Exception as e:
+            print(f"Warning: YOLOv8 segmentation failed: {e}")
+            # Continue without segmentation - we still have traditional lane detection
         out.write(vis_frame)
     return {
         "output_path": output_path,
         "detected_lanes": detected_lanes,
+        "lane_count": lane_count,
+        "frames_processed": frame_count
     }
 # Setup FinalAnswerTool
 final_answer = FinalAnswerTool()
+# Create a placeholder for a GradioUI class if it doesn't exist
+class GradioUIPlaceholder:
+    def __init__(self, agent):
+        self.agent = agent
+    def launch(self):
+        print("Using placeholder GradioUI implementation")
+        create_gradio_interface().launch()
 # Setup model
 model = HfApiModel(
     max_tokens=2096,
     custom_role_conversions=None,
 )
+# Load prompts from YAML
+with open("prompts.yaml", 'r') as stream:
+    prompt_templates = yaml.safe_load(stream)
 # Define agent
 agent = CodeAgent(
     return demo
+# Main execution - Try to use the original GradioUI if available, otherwise use our custom interface
 try:
+    # Check if GradioUI is available in the global namespace
+    if 'GradioUI' in globals():
+        print("Using original GradioUI")
+        GradioUI(agent).launch()
+    else:
+        # Use our placeholder implementation if the original isn't available
+        raise ImportError("Original GradioUI not found")
 except Exception as e:
+    print(f"Error using original GradioUI: {e}")
     print("Launching custom Gradio interface instead")
     create_gradio_interface().launch()