import cv2 as cv import numpy as np # from tts import * from ultralytics import YOLO import streamlit as st # Distance constants KNOWN_DISTANCE = 45 # INCHES PERSON_WIDTH = 16 # INCHES MOBILE_WIDTH = 3.0 # INCHES CHAIR_WIDTH = 20.0 # INCHES LAPTOP_WIDTH = 12 # INCHES text1 = "" text2 = "" # Object detector constants CONFIDENCE_THRESHOLD = 0.4 NMS_THRESHOLD = 0.3 # Colors for detected objects COLORS = [(151, 157, 255),(56, 56, 255), (31, 112, 255), (29, 178, 255), (49, 210, 207), (10, 249, 72), (23, 204, 146), (134, 219, 61), (52, 147, 26), (187, 212, 0), (168, 153, 44), (255, 194, 0), (147, 69, 52), (255, 115, 100), (236, 24, 0), (255, 56, 132), (133, 0, 82), (255, 56, 203), (200, 149, 255), (199, 55, 255)] WHITE = (255, 255, 255) BLACK = (0, 0, 0) # Defining fonts FONTS = cv.FONT_HERSHEY_PLAIN @st.cache_resource def load_model(model_path): # Load and return the YOLO model return YOLO(model_path, 'conf=0.40') # Load the YOLOv8 model model_select = "yolov8xcdark.pt" model = load_model(model_select) # You can replace it with 'yolov8-tiny.pt' if you want a smaller version # Get class names from the YOLO model class_names = model.names # Object detector function def object_detector(image): results = model(image) data_list = [] # Dictionary to store object center positions to avoid duplicates detected_objects = {} for result in results: for box, score, class_id in zip(result.boxes.xyxy, result.boxes.conf, result.boxes.cls): x1, y1, x2, y2 = map(int, box) center_x, center_y = (x1 + x2) // 2, (y1 + y2) // 2 height, width, _ = image.shape # Check if the object is already detected based on its center position if (center_x, center_y) in detected_objects: continue # Skip the duplicate object else: detected_objects[(center_x, center_y)] = True # Determine object position W_pos = "left" if center_x <= width / 3 else "center" if center_x <= 2 * width / 3 else "right" H_pos = "top" if center_y <= height / 3 else "mid" if center_y <= 2 * height / 3 else "bottom" text1, text2 = W_pos, H_pos color = COLORS[int(class_id) % len(COLORS)] label = f"{class_names[int(class_id)]} : {score:.2f}" # Draw bounding box and label cv.rectangle(image, (x1, y1), (x2, y2), color, 2) cv.putText(image, label, (x1, y1 - 10), FONTS, 0.5, color, 2) # Append relevant data if int(class_id) in [0, 67, 56, 72]: # person, mobile, chair, laptop data_list.append([class_names[int(class_id)], x2 - x1, (x1, y1 - 2), text1, text2]) return data_list # Focal length and distance functions def focal_length_finder(measured_distance, real_width, width_in_rf): return (width_in_rf * measured_distance) / real_width def distance_finder(focal_length, real_object_width, width_in_frame): return (real_object_width * focal_length) / width_in_frame # Reading reference images ref_person = cv.imread('ReferenceImages/image14.png') ref_mobile = cv.imread('ReferenceImages/image4.png') ref_chair = cv.imread('ReferenceImages/image22.png') ref_laptop = cv.imread('ReferenceImages/image2.png') # Get reference widths person_data = object_detector(ref_person) person_width_in_rf = person_data[0][1] mobile_data = object_detector(ref_mobile) mobile_width_in_rf = mobile_data[0][1] chair_data = object_detector(ref_chair) chair_width_in_rf = chair_data[0][1] laptop_data = object_detector(ref_laptop) # laptop_width_in_rf = laptop_data[0][1] # Calculate focal lengths focal_person = focal_length_finder(KNOWN_DISTANCE, PERSON_WIDTH, person_width_in_rf) focal_mobile = focal_length_finder(KNOWN_DISTANCE, MOBILE_WIDTH, mobile_width_in_rf) focal_chair = focal_length_finder(KNOWN_DISTANCE, CHAIR_WIDTH, chair_width_in_rf) # focal_laptop = focal_length_finder(KNOWN_DISTANCE, LAPTOP_WIDTH, laptop_width_in_rf) # Function to process each frame and write to the output text file def get_frame_output(frame, frame_cnt): output_text_file = open('output_text.txt', 'w') data = object_detector(frame) for d in data: if d[0] == 'person': distance = distance_finder(focal_person, PERSON_WIDTH, d[1]) elif d[0] == 'cell phone': distance = distance_finder(focal_mobile, MOBILE_WIDTH, d[1]) elif d[0] == 'chair': distance = distance_finder(focal_chair, CHAIR_WIDTH, d[1]) # elif d[0] == 'laptop': # distance = distance_finder(focal_laptop, LAPTOP_WIDTH, d[1]) x, y = d[2] text1, text2 = d[3], d[4] # Overlay distance information on the frame cv.rectangle(frame, (x+2, y+4), (x + 150, y + 20), BLACK, -1) cv.putText(frame, f'Distance: {round(distance, 2)} inches', (x + 7, y + 17), FONTS, 0.58, WHITE, 1) # Generate output text based on position and distance OUTPUT_TEXT = "" if distance > 100: OUTPUT_TEXT = "Get closer" elif 50 < round(distance) <= 100 and text2 == "mid": OUTPUT_TEXT = "Go straight" else: OUTPUT_TEXT = f"{d[0]} {int(round(distance))} inches, take left or right" output_text_file.write(OUTPUT_TEXT + "\n") output_text_file.close() return frame def get_live_frame_output(frame, result_list_json): output_text_file = open('output_text.txt', 'w') print("Im here are get live frame") # Iterate over the detection results in result_list_json for result in result_list_json: class_name = result['class'] box = result['bbox'] x1, y1, x2, y2 = box['x_min'], box['y_min'], box['x_max'], box['y_max'] width = x2 - x1 distance = None # Determine the distance based on the detected object class if class_name == 'person': distance = distance_finder(focal_person, PERSON_WIDTH, width) elif class_name == 'cell phone': distance = distance_finder(focal_mobile, MOBILE_WIDTH, width) elif class_name == 'chair': distance = distance_finder(focal_chair, CHAIR_WIDTH, width) # elif class_name == 'laptop': # distance = distance_finder(focal_laptop, LAPTOP_WIDTH, width) # Calculate the object's center and positional text (W_pos and H_pos) center_x, center_y = (x1 + x2) // 2, (y1 + y2) // 2 height, frame_width, _ = frame.shape W_pos = "left" if center_x <= frame_width / 3 else "center" if center_x <= 2 * frame_width / 3 else "right" H_pos = "top" if center_y <= height / 3 else "mid" if center_y <= 2 * height / 3 else "bottom" text1, text2 = W_pos, H_pos # Overlay distance information on the frame cv.rectangle(frame, (x1 + 2, y1 + 4), (x1 + 150, y1 + 20), BLACK, -1) cv.putText(frame, f'Distance: {round(distance, 2)} inches', (x1 + 7, y1 + 17), FONTS, 0.58, WHITE, 1) print(distance) # Generate output text based on position and distance OUTPUT_TEXT = "" if distance > 100: OUTPUT_TEXT = "Get closer" elif 50 < round(distance) <= 100 and text2 == "mid": OUTPUT_TEXT = "Go straight" else: OUTPUT_TEXT = f"{class_name} {int(round(distance))} inches, take left or right" # Write the output text to a file output_text_file.write(OUTPUT_TEXT + "\n") output_text_file.close() return frame