bahakizil commited on
Commit
282aec6
·
verified ·
1 Parent(s): 47deb51

Upload 8 files

Browse files
Files changed (8) hide show
  1. README.md +60 -14
  2. __init__.py +3 -0
  3. app.py +36 -0
  4. fix_plan.md +26 -0
  5. last.pt +3 -0
  6. requirements.txt +6 -0
  7. setup.py +18 -0
  8. yolo_agent.py +153 -0
README.md CHANGED
@@ -1,14 +1,60 @@
1
- ---
2
- title: Gun Detection Agent
3
- emoji: 👁
4
- colorFrom: green
5
- colorTo: yellow
6
- sdk: gradio
7
- sdk_version: 5.17.0
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- short_description: YOLO object detection model into a LangChain tool
12
- ---
13
-
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Gun Detection Agent
2
+
3
+ This project integrates a YOLO object detection model into a LangChain tool. It allows you to process videos (either file-based or real-time), detect objects, and generate both image and text outputs.
4
+
5
+ ## 📌 Features
6
+ - Detects objects using YOLO (class IDs: 0-5)
7
+ - Draws **red bounding boxes** and labels (e.g. "Handgun", "Knife")
8
+ - Saves detected frames as images in the `detections` directory
9
+ - Logs detection results in `detections/detections.txt`
10
+ - Can be used as a **LangChain Tool** for easy integration into other projects
11
+
12
+ ## 🚀 Installation
13
+ ### 1️⃣ Install dependencies:
14
+ ```bash
15
+ pip install -r requirements.txt
16
+ ```
17
+ ### 2️⃣ Set up the project:
18
+ ```bash
19
+ git clone https://github.com/your-repository/langchain-yolo-agent.git
20
+ cd langchain-yolo-agent
21
+ ```
22
+ ### 3️⃣ Run the detection script:
23
+ ```bash
24
+ python langchain_yolo_agent.py
25
+ ```
26
+
27
+ ## 🔧 Usage
28
+ ### As a Standalone Script
29
+ ```python
30
+ from langchain_yolo_agent import video_detection_tool
31
+
32
+ response = video_detection_tool("path/to/video.mp4")
33
+ print(response)
34
+ ```
35
+ ### As a LangChain Tool
36
+ ```python
37
+ from langchain.agents import initialize_agent, AgentType
38
+ from langchain.llms import OpenAI
39
+ from langchain_yolo_agent import video_detection_tool
40
+
41
+ llm = OpenAI(temperature=0)
42
+ tools = [video_detection_tool]
43
+
44
+ agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
45
+
46
+ user_input = "Analyze the objects in the video: path/to/video.mp4"
47
+ response = agent.run(user_input)
48
+ print(response)
49
+ ```
50
+
51
+ ## 📂 Output Files
52
+ - **Detected frames**: Saved in `detections/` (e.g., `detections/frame_1240.jpg`)
53
+ - **Detection log**: `detections/detections.txt` (text-based log of detected objects)
54
+
55
+ ## 🔥 Future Improvements
56
+ - Add support for real-time video detection
57
+ - Deploy as an API for remote processing
58
+
59
+ ## 📝 License
60
+ This project is open-source and available under the MIT license.
__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from .yolo_agent import video_detection_tool
2
+
3
+ __all__ = ["video_detection_tool"]
app.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from yolo_agent import video_detection_tool
3
+ import os
4
+ import time
5
+
6
+ def detect_objects(video):
7
+ """Handles video upload and runs YOLO detection, displaying detections in real-time."""
8
+ result = video_detection_tool.invoke(video, conf=0.8) # Explicitly setting confidence threshold
9
+ detected_images = "detections" # Folder where detected images are stored
10
+
11
+
12
+ image_paths = []
13
+ if os.path.exists(detected_images):
14
+ for _ in range(20): # Limit the loop to avoid infinite execution
15
+ new_images = sorted(
16
+ [os.path.join(detected_images, img) for img in os.listdir(detected_images) if img.endswith(".jpg")],
17
+ key=os.path.getmtime # Sort images by modification time for real-time order
18
+ )
19
+ if new_images != image_paths:
20
+ image_paths = new_images
21
+ yield result, image_paths
22
+ time.sleep(1) # Update images in real-time
23
+
24
+ return result, []
25
+
26
+ # Gradio Interface
27
+ demo = gr.Blocks()
28
+ with demo:
29
+ gr.Markdown("# 🎥 YOLO Object Detection with LangChain - Real-time Display")
30
+ video_input = gr.File(label="📤 Upload a Video", type="filepath")
31
+ output_text = gr.Textbox(label="📄 Detection Results")
32
+ output_gallery = gr.Gallery(label="📸 Detected Objects", show_label=True, interactive=False, columns=4)
33
+ detect_button = gr.Button("🚀 Run Detection")
34
+ detect_button.click(fn=detect_objects, inputs=video_input, outputs=[output_text, output_gallery]) # Removed `live=True`
35
+
36
+ demo.launch(share=True)
fix_plan.md ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Proje Düzeltme Planı
2
+
3
+ ## 1. Model Yolu Güncellemesi
4
+ - `yolo_agent.py` dosyasındaki `MODEL_PATH` değişkenini güncelleyerek "last.pt" dosyasını kullanacak şekilde ayarlayacağım:
5
+ ```python
6
+ MODEL_PATH = os.path.join(os.path.dirname(__file__), "last.pt")
7
+ ```
8
+
9
+ ## 2. Test Videosu Ekleme
10
+ - Projeye örnek bir test videosu ekleyeceğim. Bu video `test_videos` dizinine eklenecek ve README.md'de belirtilecek.
11
+
12
+ ## 3. LangChain Entegrasyonu Testi
13
+ - LangChain entegrasyonunu test etmek için `test_langchain.py` adında yeni bir dosya oluşturacağım. Bu dosya README.md'de belirtilen örnek kullanımı içerecek.
14
+
15
+ ## 4. README.md Güncellemesi
16
+ - README.md dosyasını güncelleyerek hem Gradio arayüzü hem de LangChain aracı kullanımını açıkça belirteceğim.
17
+ - Yeni test videosu ve LangChain test scripti hakkında bilgi ekleyeceğim.
18
+
19
+ ## Uygulama Adımları
20
+ 1. Model yolunu güncelle
21
+ 2. Test videosu ekle
22
+ 3. LangChain test scripti oluştur
23
+ 4. README.md'yi güncelle
24
+ 5. Tüm değişiklikleri test et
25
+
26
+ Bu planı onaylarsanız Code moduna geçerek uygulamaya başlayabilirim.
last.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c7f69ad83e71c4a19ffbcdff30fcabbbbee4ef335033c8fb24b35186d0f38ce
3
+ size 5470931
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ langchain
2
+ openai
3
+ ultralytics
4
+ opencv-python
5
+ opencv-contrib-python
6
+ gradio
setup.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from setuptools import setup, find_packages
2
+
3
+ setup(
4
+ name="yolo_agent",
5
+ version="1.0",
6
+ packages=find_packages(),
7
+ install_requires=[
8
+ "langchain",
9
+ "openai",
10
+ "ultralytics",
11
+ "opencv-python",
12
+ "opencv-contrib-python"
13
+ ],
14
+ include_package_data=True,
15
+ package_data={
16
+ "": ["best.pt"] # Ensure the YOLO model file is included in the package
17
+ },
18
+ )
yolo_agent.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ LangChain YOLO Agent
3
+ ---------------------
4
+
5
+ This project provides a YOLO-based object detection tool integrated with LangChain.
6
+ Users can upload any video to analyze its contents, generate object detection logs,
7
+ and visualize detections with bounding boxes.
8
+
9
+ Steps:
10
+ 1) Install dependencies: `pip install langchain openai ultralytics opencv-python`
11
+ 2) Add this file (`yolo_agent.py`) to your project.
12
+ 3) Ensure that the YOLO model file (`last.pt`) is available in the working directory.
13
+ 4) Use the provided functions to analyze uploaded videos dynamically.
14
+ """
15
+
16
+ import os
17
+ import cv2
18
+ import shutil
19
+ from langchain.agents import Tool, tool
20
+ from ultralytics import YOLO
21
+
22
+ UPLOAD_FOLDER = "uploads"
23
+ os.makedirs(UPLOAD_FOLDER, exist_ok=True)
24
+ # Update the model path to point to the "yolo" directory
25
+ MODEL_PATH = os.path.join(os.path.dirname(__file__), "last.pt")
26
+
27
+
28
+ def detect_with_yolo(
29
+ video_path: str,
30
+ model_path: str = os.path.join(os.path.dirname(__file__), "last.pt"),
31
+ output_dir: str = "detections",
32
+ frame_skip: int = 10,
33
+ conf: float = 0.7
34
+ ) -> str:
35
+ """
36
+ Runs YOLO detection on the given video.
37
+ - Detects only class_id 0..5 (Danger / Handgun / Knife, etc.)
38
+ - Draws red bounding boxes
39
+ - Saves logs to a text file
40
+ - Saves detected frames as images
41
+ """
42
+
43
+ if not os.path.exists(video_path):
44
+ return f"Video not found: {video_path}"
45
+
46
+ try:
47
+ model = YOLO(model_path)
48
+ except Exception as e:
49
+ return f"Failed to load model: {e}"
50
+
51
+ cap = cv2.VideoCapture(video_path)
52
+ if not cap.isOpened():
53
+ return f"Cannot open video: {video_path}"
54
+
55
+ os.makedirs(output_dir, exist_ok=True)
56
+ output_txt = os.path.join(output_dir, "detections.txt")
57
+ frame_count = 0
58
+
59
+ with open(output_txt, "w") as ftxt:
60
+ while cap.isOpened():
61
+ ret, frame = cap.read()
62
+ if not ret:
63
+ break
64
+
65
+ results = model(frame, conf=0.8)
66
+ detections = (
67
+ results[0].boxes.data.cpu().numpy() if len(results) > 0 else []
68
+ )
69
+
70
+ valid_detections = [det for det in detections if int(det[5]) in [0, 1, 2, 3, 4, 5]]
71
+
72
+ if len(valid_detections) > 0:
73
+ for det in valid_detections:
74
+ x1, y1, x2, y2, conf, cls_ = det
75
+ class_id = int(cls_)
76
+
77
+ if class_id in [0, 1, 2]:
78
+ class_label = "Danger"
79
+ elif class_id in [3, 4, 5]:
80
+ class_label = model.names.get(class_id, f"Class {class_id}")
81
+ else:
82
+ class_label = f"Class {class_id}"
83
+
84
+ cv2.rectangle(
85
+ frame,
86
+ (int(x1), int(y1)),
87
+ (int(x2), int(y2)),
88
+ (0, 0, 255),
89
+ 3,
90
+ )
91
+
92
+ (w, h), _ = cv2.getTextSize(class_label, cv2.FONT_HERSHEY_COMPLEX, 0.8, 2)
93
+ label_x1 = int(x1)
94
+ label_y2 = int(y1)
95
+ label_y1 = label_y2 - h - 10
96
+ label_x2 = label_x1 + w + 10
97
+
98
+ cv2.rectangle(
99
+ frame,
100
+ (label_x1, label_y1),
101
+ (label_x2, label_y2),
102
+ (0, 0, 255),
103
+ cv2.FILLED,
104
+ )
105
+
106
+ cv2.putText(
107
+ frame,
108
+ class_label,
109
+ (label_x1 + 5, label_y1 + h + 5),
110
+ cv2.FONT_HERSHEY_COMPLEX,
111
+ 0.85,
112
+ (255, 255, 255),
113
+ 2,
114
+ cv2.LINE_AA,
115
+ )
116
+
117
+ ftxt.write(
118
+ f"Frame {frame_count}: {class_label} at ({int(x1)}, {int(y1)}, {int(x2)}, {int(y2)})\n"
119
+ )
120
+
121
+ output_frame_path = os.path.join(output_dir, f"frame_{frame_count}.jpg")
122
+ cv2.imwrite(output_frame_path, frame)
123
+
124
+ frame_count += frame_skip
125
+ cap.set(cv2.CAP_PROP_POS_FRAMES, frame_count)
126
+
127
+ cap.release()
128
+ cv2.destroyAllWindows()
129
+
130
+ return f"Processing complete. Outputs saved in '{output_dir}' and '{output_txt}'."
131
+
132
+ @tool("video_detection_tool", return_direct=True)
133
+ def video_detection_tool(video) -> str:
134
+ """
135
+ Handles video uploads dynamically and runs YOLO detection.
136
+ Saves results in the "detections" folder and logs to detections.txt.
137
+ """
138
+ video_path = os.path.join(UPLOAD_FOLDER, os.path.basename(video.name))
139
+ # Update the model path used here too
140
+ MODEL_PATH = os.path.join(os.path.dirname(__file__), "last.pt")
141
+
142
+ shutil.copy(video.name, video_path)
143
+ try:
144
+ result = detect_with_yolo(video_path)
145
+ except Exception as e:
146
+ return f"Error during detection: {e}"
147
+
148
+ return result
149
+
150
+ if __name__ == "__main__":
151
+ print("LangChain YOLO Agent Ready!")
152
+
153
+ __all__ = ["video_detection_tool"]