Nightjars / DistanceEstimation.py
brindhamanick's picture
Upload 98 files
563dad2 verified
import cv2 as cv
import numpy as np
# from tts import *
from ultralytics import YOLO
import streamlit as st
# Distance constants
KNOWN_DISTANCE = 45 # INCHES
PERSON_WIDTH = 16 # INCHES
MOBILE_WIDTH = 3.0 # INCHES
CHAIR_WIDTH = 20.0 # INCHES
LAPTOP_WIDTH = 12 # INCHES
text1 = ""
text2 = ""
# Object detector constants
CONFIDENCE_THRESHOLD = 0.4
NMS_THRESHOLD = 0.3
# Colors for detected objects
COLORS = [(151, 157, 255),(56, 56, 255), (31, 112, 255), (29, 178, 255), (49, 210, 207), (10, 249, 72), (23, 204, 146),
(134, 219, 61), (52, 147, 26), (187, 212, 0), (168, 153, 44), (255, 194, 0), (147, 69, 52), (255, 115, 100),
(236, 24, 0), (255, 56, 132), (133, 0, 82), (255, 56, 203), (200, 149, 255), (199, 55, 255)]
WHITE = (255, 255, 255)
BLACK = (0, 0, 0)
# Defining fonts
FONTS = cv.FONT_HERSHEY_PLAIN
@st.cache_resource
def load_model(model_path):
# Load and return the YOLO model
return YOLO(model_path, 'conf=0.40')
# Load the YOLOv8 model
model_select = "yolov8xcdark.pt"
model = load_model(model_select) # You can replace it with 'yolov8-tiny.pt' if you want a smaller version
# Get class names from the YOLO model
class_names = model.names
# Object detector function
def object_detector(image):
results = model(image)
data_list = []
# Dictionary to store object center positions to avoid duplicates
detected_objects = {}
for result in results:
for box, score, class_id in zip(result.boxes.xyxy, result.boxes.conf, result.boxes.cls):
x1, y1, x2, y2 = map(int, box)
center_x, center_y = (x1 + x2) // 2, (y1 + y2) // 2
height, width, _ = image.shape
# Check if the object is already detected based on its center position
if (center_x, center_y) in detected_objects:
continue # Skip the duplicate object
else:
detected_objects[(center_x, center_y)] = True
# Determine object position
W_pos = "left" if center_x <= width / 3 else "center" if center_x <= 2 * width / 3 else "right"
H_pos = "top" if center_y <= height / 3 else "mid" if center_y <= 2 * height / 3 else "bottom"
text1, text2 = W_pos, H_pos
color = COLORS[int(class_id) % len(COLORS)]
label = f"{class_names[int(class_id)]} : {score:.2f}"
# Draw bounding box and label
cv.rectangle(image, (x1, y1), (x2, y2), color, 2)
cv.putText(image, label, (x1, y1 - 10), FONTS, 0.5, color, 2)
# Append relevant data
if int(class_id) in [0, 67, 56, 72]: # person, mobile, chair, laptop
data_list.append([class_names[int(class_id)], x2 - x1, (x1, y1 - 2), text1, text2])
return data_list
# Focal length and distance functions
def focal_length_finder(measured_distance, real_width, width_in_rf):
return (width_in_rf * measured_distance) / real_width
def distance_finder(focal_length, real_object_width, width_in_frame):
return (real_object_width * focal_length) / width_in_frame
# Reading reference images
ref_person = cv.imread('ReferenceImages/image14.png')
ref_mobile = cv.imread('ReferenceImages/image4.png')
ref_chair = cv.imread('ReferenceImages/image22.png')
ref_laptop = cv.imread('ReferenceImages/image2.png')
# Get reference widths
person_data = object_detector(ref_person)
person_width_in_rf = person_data[0][1]
mobile_data = object_detector(ref_mobile)
mobile_width_in_rf = mobile_data[0][1]
chair_data = object_detector(ref_chair)
chair_width_in_rf = chair_data[0][1]
laptop_data = object_detector(ref_laptop)
# laptop_width_in_rf = laptop_data[0][1]
# Calculate focal lengths
focal_person = focal_length_finder(KNOWN_DISTANCE, PERSON_WIDTH, person_width_in_rf)
focal_mobile = focal_length_finder(KNOWN_DISTANCE, MOBILE_WIDTH, mobile_width_in_rf)
focal_chair = focal_length_finder(KNOWN_DISTANCE, CHAIR_WIDTH, chair_width_in_rf)
# focal_laptop = focal_length_finder(KNOWN_DISTANCE, LAPTOP_WIDTH, laptop_width_in_rf)
# Function to process each frame and write to the output text file
def get_frame_output(frame, frame_cnt):
output_text_file = open('output_text.txt', 'w')
data = object_detector(frame)
for d in data:
if d[0] == 'person':
distance = distance_finder(focal_person, PERSON_WIDTH, d[1])
elif d[0] == 'cell phone':
distance = distance_finder(focal_mobile, MOBILE_WIDTH, d[1])
elif d[0] == 'chair':
distance = distance_finder(focal_chair, CHAIR_WIDTH, d[1])
# elif d[0] == 'laptop':
# distance = distance_finder(focal_laptop, LAPTOP_WIDTH, d[1])
x, y = d[2]
text1, text2 = d[3], d[4]
# Overlay distance information on the frame
cv.rectangle(frame, (x+2, y+4), (x + 150, y + 20), BLACK, -1)
cv.putText(frame, f'Distance: {round(distance, 2)} inches', (x + 7, y + 17), FONTS, 0.58, WHITE, 1)
# Generate output text based on position and distance
OUTPUT_TEXT = ""
if distance > 100:
OUTPUT_TEXT = "Get closer"
elif 50 < round(distance) <= 100 and text2 == "mid":
OUTPUT_TEXT = "Go straight"
else:
OUTPUT_TEXT = f"{d[0]} {int(round(distance))} inches, take left or right"
output_text_file.write(OUTPUT_TEXT + "\n")
output_text_file.close()
return frame
def get_live_frame_output(frame, result_list_json):
output_text_file = open('output_text.txt', 'w')
print("Im here are get live frame")
# Iterate over the detection results in result_list_json
for result in result_list_json:
class_name = result['class']
box = result['bbox']
x1, y1, x2, y2 = box['x_min'], box['y_min'], box['x_max'], box['y_max']
width = x2 - x1
distance = None
# Determine the distance based on the detected object class
if class_name == 'person':
distance = distance_finder(focal_person, PERSON_WIDTH, width)
elif class_name == 'cell phone':
distance = distance_finder(focal_mobile, MOBILE_WIDTH, width)
elif class_name == 'chair':
distance = distance_finder(focal_chair, CHAIR_WIDTH, width)
# elif class_name == 'laptop':
# distance = distance_finder(focal_laptop, LAPTOP_WIDTH, width)
# Calculate the object's center and positional text (W_pos and H_pos)
center_x, center_y = (x1 + x2) // 2, (y1 + y2) // 2
height, frame_width, _ = frame.shape
W_pos = "left" if center_x <= frame_width / 3 else "center" if center_x <= 2 * frame_width / 3 else "right"
H_pos = "top" if center_y <= height / 3 else "mid" if center_y <= 2 * height / 3 else "bottom"
text1, text2 = W_pos, H_pos
# Overlay distance information on the frame
cv.rectangle(frame, (x1 + 2, y1 + 4), (x1 + 150, y1 + 20), BLACK, -1)
cv.putText(frame, f'Distance: {round(distance, 2)} inches', (x1 + 7, y1 + 17), FONTS, 0.58, WHITE, 1)
print(distance)
# Generate output text based on position and distance
OUTPUT_TEXT = ""
if distance > 100:
OUTPUT_TEXT = "Get closer"
elif 50 < round(distance) <= 100 and text2 == "mid":
OUTPUT_TEXT = "Go straight"
else:
OUTPUT_TEXT = f"{class_name} {int(round(distance))} inches, take left or right"
# Write the output text to a file
output_text_file.write(OUTPUT_TEXT + "\n")
output_text_file.close()
return frame