Spaces:
Sleeping
Sleeping
Upload 8 files
Browse files- README.md +60 -14
- __init__.py +3 -0
- app.py +36 -0
- fix_plan.md +26 -0
- last.pt +3 -0
- requirements.txt +6 -0
- setup.py +18 -0
- yolo_agent.py +153 -0
README.md
CHANGED
|
@@ -1,14 +1,60 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Gun Detection Agent
|
| 2 |
+
|
| 3 |
+
This project integrates a YOLO object detection model into a LangChain tool. It allows you to process videos (either file-based or real-time), detect objects, and generate both image and text outputs.
|
| 4 |
+
|
| 5 |
+
## 📌 Features
|
| 6 |
+
- Detects objects using YOLO (class IDs: 0-5)
|
| 7 |
+
- Draws **red bounding boxes** and labels (e.g. "Handgun", "Knife")
|
| 8 |
+
- Saves detected frames as images in the `detections` directory
|
| 9 |
+
- Logs detection results in `detections/detections.txt`
|
| 10 |
+
- Can be used as a **LangChain Tool** for easy integration into other projects
|
| 11 |
+
|
| 12 |
+
## 🚀 Installation
|
| 13 |
+
### 1️⃣ Install dependencies:
|
| 14 |
+
```bash
|
| 15 |
+
pip install -r requirements.txt
|
| 16 |
+
```
|
| 17 |
+
### 2️⃣ Set up the project:
|
| 18 |
+
```bash
|
| 19 |
+
git clone https://github.com/your-repository/langchain-yolo-agent.git
|
| 20 |
+
cd langchain-yolo-agent
|
| 21 |
+
```
|
| 22 |
+
### 3️⃣ Run the detection script:
|
| 23 |
+
```bash
|
| 24 |
+
python langchain_yolo_agent.py
|
| 25 |
+
```
|
| 26 |
+
|
| 27 |
+
## 🔧 Usage
|
| 28 |
+
### As a Standalone Script
|
| 29 |
+
```python
|
| 30 |
+
from langchain_yolo_agent import video_detection_tool
|
| 31 |
+
|
| 32 |
+
response = video_detection_tool("path/to/video.mp4")
|
| 33 |
+
print(response)
|
| 34 |
+
```
|
| 35 |
+
### As a LangChain Tool
|
| 36 |
+
```python
|
| 37 |
+
from langchain.agents import initialize_agent, AgentType
|
| 38 |
+
from langchain.llms import OpenAI
|
| 39 |
+
from langchain_yolo_agent import video_detection_tool
|
| 40 |
+
|
| 41 |
+
llm = OpenAI(temperature=0)
|
| 42 |
+
tools = [video_detection_tool]
|
| 43 |
+
|
| 44 |
+
agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
|
| 45 |
+
|
| 46 |
+
user_input = "Analyze the objects in the video: path/to/video.mp4"
|
| 47 |
+
response = agent.run(user_input)
|
| 48 |
+
print(response)
|
| 49 |
+
```
|
| 50 |
+
|
| 51 |
+
## 📂 Output Files
|
| 52 |
+
- **Detected frames**: Saved in `detections/` (e.g., `detections/frame_1240.jpg`)
|
| 53 |
+
- **Detection log**: `detections/detections.txt` (text-based log of detected objects)
|
| 54 |
+
|
| 55 |
+
## 🔥 Future Improvements
|
| 56 |
+
- Add support for real-time video detection
|
| 57 |
+
- Deploy as an API for remote processing
|
| 58 |
+
|
| 59 |
+
## 📝 License
|
| 60 |
+
This project is open-source and available under the MIT license.
|
__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .yolo_agent import video_detection_tool
|
| 2 |
+
|
| 3 |
+
__all__ = ["video_detection_tool"]
|
app.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from yolo_agent import video_detection_tool
|
| 3 |
+
import os
|
| 4 |
+
import time
|
| 5 |
+
|
| 6 |
+
def detect_objects(video):
|
| 7 |
+
"""Handles video upload and runs YOLO detection, displaying detections in real-time."""
|
| 8 |
+
result = video_detection_tool.invoke(video, conf=0.8) # Explicitly setting confidence threshold
|
| 9 |
+
detected_images = "detections" # Folder where detected images are stored
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
image_paths = []
|
| 13 |
+
if os.path.exists(detected_images):
|
| 14 |
+
for _ in range(20): # Limit the loop to avoid infinite execution
|
| 15 |
+
new_images = sorted(
|
| 16 |
+
[os.path.join(detected_images, img) for img in os.listdir(detected_images) if img.endswith(".jpg")],
|
| 17 |
+
key=os.path.getmtime # Sort images by modification time for real-time order
|
| 18 |
+
)
|
| 19 |
+
if new_images != image_paths:
|
| 20 |
+
image_paths = new_images
|
| 21 |
+
yield result, image_paths
|
| 22 |
+
time.sleep(1) # Update images in real-time
|
| 23 |
+
|
| 24 |
+
return result, []
|
| 25 |
+
|
| 26 |
+
# Gradio Interface
|
| 27 |
+
demo = gr.Blocks()
|
| 28 |
+
with demo:
|
| 29 |
+
gr.Markdown("# 🎥 YOLO Object Detection with LangChain - Real-time Display")
|
| 30 |
+
video_input = gr.File(label="📤 Upload a Video", type="filepath")
|
| 31 |
+
output_text = gr.Textbox(label="📄 Detection Results")
|
| 32 |
+
output_gallery = gr.Gallery(label="📸 Detected Objects", show_label=True, interactive=False, columns=4)
|
| 33 |
+
detect_button = gr.Button("🚀 Run Detection")
|
| 34 |
+
detect_button.click(fn=detect_objects, inputs=video_input, outputs=[output_text, output_gallery]) # Removed `live=True`
|
| 35 |
+
|
| 36 |
+
demo.launch(share=True)
|
fix_plan.md
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Proje Düzeltme Planı
|
| 2 |
+
|
| 3 |
+
## 1. Model Yolu Güncellemesi
|
| 4 |
+
- `yolo_agent.py` dosyasındaki `MODEL_PATH` değişkenini güncelleyerek "last.pt" dosyasını kullanacak şekilde ayarlayacağım:
|
| 5 |
+
```python
|
| 6 |
+
MODEL_PATH = os.path.join(os.path.dirname(__file__), "last.pt")
|
| 7 |
+
```
|
| 8 |
+
|
| 9 |
+
## 2. Test Videosu Ekleme
|
| 10 |
+
- Projeye örnek bir test videosu ekleyeceğim. Bu video `test_videos` dizinine eklenecek ve README.md'de belirtilecek.
|
| 11 |
+
|
| 12 |
+
## 3. LangChain Entegrasyonu Testi
|
| 13 |
+
- LangChain entegrasyonunu test etmek için `test_langchain.py` adında yeni bir dosya oluşturacağım. Bu dosya README.md'de belirtilen örnek kullanımı içerecek.
|
| 14 |
+
|
| 15 |
+
## 4. README.md Güncellemesi
|
| 16 |
+
- README.md dosyasını güncelleyerek hem Gradio arayüzü hem de LangChain aracı kullanımını açıkça belirteceğim.
|
| 17 |
+
- Yeni test videosu ve LangChain test scripti hakkında bilgi ekleyeceğim.
|
| 18 |
+
|
| 19 |
+
## Uygulama Adımları
|
| 20 |
+
1. Model yolunu güncelle
|
| 21 |
+
2. Test videosu ekle
|
| 22 |
+
3. LangChain test scripti oluştur
|
| 23 |
+
4. README.md'yi güncelle
|
| 24 |
+
5. Tüm değişiklikleri test et
|
| 25 |
+
|
| 26 |
+
Bu planı onaylarsanız Code moduna geçerek uygulamaya başlayabilirim.
|
last.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3c7f69ad83e71c4a19ffbcdff30fcabbbbee4ef335033c8fb24b35186d0f38ce
|
| 3 |
+
size 5470931
|
requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
langchain
|
| 2 |
+
openai
|
| 3 |
+
ultralytics
|
| 4 |
+
opencv-python
|
| 5 |
+
opencv-contrib-python
|
| 6 |
+
gradio
|
setup.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from setuptools import setup, find_packages
|
| 2 |
+
|
| 3 |
+
setup(
|
| 4 |
+
name="yolo_agent",
|
| 5 |
+
version="1.0",
|
| 6 |
+
packages=find_packages(),
|
| 7 |
+
install_requires=[
|
| 8 |
+
"langchain",
|
| 9 |
+
"openai",
|
| 10 |
+
"ultralytics",
|
| 11 |
+
"opencv-python",
|
| 12 |
+
"opencv-contrib-python"
|
| 13 |
+
],
|
| 14 |
+
include_package_data=True,
|
| 15 |
+
package_data={
|
| 16 |
+
"": ["best.pt"] # Ensure the YOLO model file is included in the package
|
| 17 |
+
},
|
| 18 |
+
)
|
yolo_agent.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
LangChain YOLO Agent
|
| 3 |
+
---------------------
|
| 4 |
+
|
| 5 |
+
This project provides a YOLO-based object detection tool integrated with LangChain.
|
| 6 |
+
Users can upload any video to analyze its contents, generate object detection logs,
|
| 7 |
+
and visualize detections with bounding boxes.
|
| 8 |
+
|
| 9 |
+
Steps:
|
| 10 |
+
1) Install dependencies: `pip install langchain openai ultralytics opencv-python`
|
| 11 |
+
2) Add this file (`yolo_agent.py`) to your project.
|
| 12 |
+
3) Ensure that the YOLO model file (`last.pt`) is available in the working directory.
|
| 13 |
+
4) Use the provided functions to analyze uploaded videos dynamically.
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
import os
|
| 17 |
+
import cv2
|
| 18 |
+
import shutil
|
| 19 |
+
from langchain.agents import Tool, tool
|
| 20 |
+
from ultralytics import YOLO
|
| 21 |
+
|
| 22 |
+
UPLOAD_FOLDER = "uploads"
|
| 23 |
+
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
|
| 24 |
+
# Update the model path to point to the "yolo" directory
|
| 25 |
+
MODEL_PATH = os.path.join(os.path.dirname(__file__), "last.pt")
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def detect_with_yolo(
|
| 29 |
+
video_path: str,
|
| 30 |
+
model_path: str = os.path.join(os.path.dirname(__file__), "last.pt"),
|
| 31 |
+
output_dir: str = "detections",
|
| 32 |
+
frame_skip: int = 10,
|
| 33 |
+
conf: float = 0.7
|
| 34 |
+
) -> str:
|
| 35 |
+
"""
|
| 36 |
+
Runs YOLO detection on the given video.
|
| 37 |
+
- Detects only class_id 0..5 (Danger / Handgun / Knife, etc.)
|
| 38 |
+
- Draws red bounding boxes
|
| 39 |
+
- Saves logs to a text file
|
| 40 |
+
- Saves detected frames as images
|
| 41 |
+
"""
|
| 42 |
+
|
| 43 |
+
if not os.path.exists(video_path):
|
| 44 |
+
return f"Video not found: {video_path}"
|
| 45 |
+
|
| 46 |
+
try:
|
| 47 |
+
model = YOLO(model_path)
|
| 48 |
+
except Exception as e:
|
| 49 |
+
return f"Failed to load model: {e}"
|
| 50 |
+
|
| 51 |
+
cap = cv2.VideoCapture(video_path)
|
| 52 |
+
if not cap.isOpened():
|
| 53 |
+
return f"Cannot open video: {video_path}"
|
| 54 |
+
|
| 55 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 56 |
+
output_txt = os.path.join(output_dir, "detections.txt")
|
| 57 |
+
frame_count = 0
|
| 58 |
+
|
| 59 |
+
with open(output_txt, "w") as ftxt:
|
| 60 |
+
while cap.isOpened():
|
| 61 |
+
ret, frame = cap.read()
|
| 62 |
+
if not ret:
|
| 63 |
+
break
|
| 64 |
+
|
| 65 |
+
results = model(frame, conf=0.8)
|
| 66 |
+
detections = (
|
| 67 |
+
results[0].boxes.data.cpu().numpy() if len(results) > 0 else []
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
valid_detections = [det for det in detections if int(det[5]) in [0, 1, 2, 3, 4, 5]]
|
| 71 |
+
|
| 72 |
+
if len(valid_detections) > 0:
|
| 73 |
+
for det in valid_detections:
|
| 74 |
+
x1, y1, x2, y2, conf, cls_ = det
|
| 75 |
+
class_id = int(cls_)
|
| 76 |
+
|
| 77 |
+
if class_id in [0, 1, 2]:
|
| 78 |
+
class_label = "Danger"
|
| 79 |
+
elif class_id in [3, 4, 5]:
|
| 80 |
+
class_label = model.names.get(class_id, f"Class {class_id}")
|
| 81 |
+
else:
|
| 82 |
+
class_label = f"Class {class_id}"
|
| 83 |
+
|
| 84 |
+
cv2.rectangle(
|
| 85 |
+
frame,
|
| 86 |
+
(int(x1), int(y1)),
|
| 87 |
+
(int(x2), int(y2)),
|
| 88 |
+
(0, 0, 255),
|
| 89 |
+
3,
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
(w, h), _ = cv2.getTextSize(class_label, cv2.FONT_HERSHEY_COMPLEX, 0.8, 2)
|
| 93 |
+
label_x1 = int(x1)
|
| 94 |
+
label_y2 = int(y1)
|
| 95 |
+
label_y1 = label_y2 - h - 10
|
| 96 |
+
label_x2 = label_x1 + w + 10
|
| 97 |
+
|
| 98 |
+
cv2.rectangle(
|
| 99 |
+
frame,
|
| 100 |
+
(label_x1, label_y1),
|
| 101 |
+
(label_x2, label_y2),
|
| 102 |
+
(0, 0, 255),
|
| 103 |
+
cv2.FILLED,
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
cv2.putText(
|
| 107 |
+
frame,
|
| 108 |
+
class_label,
|
| 109 |
+
(label_x1 + 5, label_y1 + h + 5),
|
| 110 |
+
cv2.FONT_HERSHEY_COMPLEX,
|
| 111 |
+
0.85,
|
| 112 |
+
(255, 255, 255),
|
| 113 |
+
2,
|
| 114 |
+
cv2.LINE_AA,
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
ftxt.write(
|
| 118 |
+
f"Frame {frame_count}: {class_label} at ({int(x1)}, {int(y1)}, {int(x2)}, {int(y2)})\n"
|
| 119 |
+
)
|
| 120 |
+
|
| 121 |
+
output_frame_path = os.path.join(output_dir, f"frame_{frame_count}.jpg")
|
| 122 |
+
cv2.imwrite(output_frame_path, frame)
|
| 123 |
+
|
| 124 |
+
frame_count += frame_skip
|
| 125 |
+
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_count)
|
| 126 |
+
|
| 127 |
+
cap.release()
|
| 128 |
+
cv2.destroyAllWindows()
|
| 129 |
+
|
| 130 |
+
return f"Processing complete. Outputs saved in '{output_dir}' and '{output_txt}'."
|
| 131 |
+
|
| 132 |
+
@tool("video_detection_tool", return_direct=True)
|
| 133 |
+
def video_detection_tool(video) -> str:
|
| 134 |
+
"""
|
| 135 |
+
Handles video uploads dynamically and runs YOLO detection.
|
| 136 |
+
Saves results in the "detections" folder and logs to detections.txt.
|
| 137 |
+
"""
|
| 138 |
+
video_path = os.path.join(UPLOAD_FOLDER, os.path.basename(video.name))
|
| 139 |
+
# Update the model path used here too
|
| 140 |
+
MODEL_PATH = os.path.join(os.path.dirname(__file__), "last.pt")
|
| 141 |
+
|
| 142 |
+
shutil.copy(video.name, video_path)
|
| 143 |
+
try:
|
| 144 |
+
result = detect_with_yolo(video_path)
|
| 145 |
+
except Exception as e:
|
| 146 |
+
return f"Error during detection: {e}"
|
| 147 |
+
|
| 148 |
+
return result
|
| 149 |
+
|
| 150 |
+
if __name__ == "__main__":
|
| 151 |
+
print("LangChain YOLO Agent Ready!")
|
| 152 |
+
|
| 153 |
+
__all__ = ["video_detection_tool"]
|