Spaces:

Myogyi
/

newcustomyolo

Sleeping

App Files Files Community

newcustomyolo / interfacetest2.py

Myogyi

Update interfacetest2.py

f6cf90e 11 months ago

raw

history blame contribute delete

12 kB

	import argparse
	import time
	from pathlib import Path
	import os
	import cv2
	import torch
	import torch.backends.cudnn as cudnn
	from numpy import random
	import numpy as np
	import ffmpeg
	import gradio as gr
	from fastapi import FastAPI
	import uvicorn
	import shutil
	from models.experimental import attempt_load
	from utils.datasets import LoadStreams, LoadImages
	from utils.general import check_img_size, check_requirements, check_imshow, non_max_suppression, \
	scale_coords, strip_optimizer, set_logging, increment_path
	from utils.plots import plot_one_box
	from utils.torch_utils import select_device, time_synchronized, TracedModel

	# Function to compute IoU between two boxes
	def compute_iou(box1, box2):
	x1, y1, x2, y2 = box1
	x1_, y1_, x2_, y2_ = box2
	xi1 = max(x1, x1_)
	yi1 = max(y1, y1_)
	xi2 = min(x2, x2_)
	yi2 = min(y2, y2_)
	inter_width = max(0, xi2 - xi1)
	inter_height = max(0, yi2 - yi1)
	inter_area = inter_width * inter_height
	box1_area = (x2 - x1) * (y2 - y1)
	box2_area = (x2_ - x1_) * (y2_ - y1_)
	union_area = box1_area + box2_area - inter_area
	return inter_area / union_area if union_area != 0 else 0.0

	# Function to check if a scanner is moving based on centroid displacement
	def is_scanner_moving(prev_centroids, curr_box, scanner_id, threshold=2.0):
	x1, y1, x2, y2 = curr_box
	curr_centroid = ((x1 + x2) / 2, (y1 + y2) / 2)
	if scanner_id in prev_centroids:
	prev_x, prev_y = prev_centroids[scanner_id]
	distance = np.sqrt((curr_centroid[0] - prev_x)2 + (curr_centroid[1] - prev_y)2)
	return distance > threshold
	return False # Default to "not moving" if no previous centroid exists

	# Function to convert video to H.264 format
	def convert_to_h264(input_path):
	output_path = str(Path(input_path).with_suffix('')) + "_h264.mp4"
	try:
	stream = ffmpeg.input(input_path)
	stream = ffmpeg.output(stream, output_path, vcodec='libx264', acodec='aac', format='mp4', pix_fmt='yuv420p')
	ffmpeg.run(stream, cmd='/usr/bin/ffmpeg', overwrite_output=True)
	return output_path
	except ffmpeg.Error as e:
	stderr = e.stderr.decode('utf-8') if e.stderr else "Unknown FFmpeg error"
	print(f"FFmpeg error: {stderr}")
	return input_path

	# Detection function adapted from the second script
	def detect_video(video_path, weights, conf_thres=0.25, iou_thres=0.45, img_size=640, device='', save_dir='runs/detect/exp', trace=False):
	save_dir = Path(increment_path(Path(save_dir), exist_ok=True))
	save_dir.mkdir(parents=True, exist_ok=True)

	# Initialize
	set_logging()
	device = select_device(device)
	half = device.type != 'cpu'

	# Load model
	model = attempt_load(weights, map_location=device)
	stride = int(model.stride.max())
	imgsz = check_img_size(img_size, s=stride)

	if trace:
	model = TracedModel(model, device, img_size)
	if half:
	model.half()

	# Set Dataloader
	dataset = LoadImages(video_path, img_size=imgsz, stride=stride)

	# Get names and colors
	names = model.module.names if hasattr(model, 'module') else model.names
	colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

	# Initialize variables
	vid_path, vid_writer = None, None
	prev_centroids = {}
	scanner_id_counter = 0
	product_scanning_status_global = ""
	payment_scanning_status_global = ""
	old_img_b, old_img_h, old_img_w = 0, 0, 0

	for path, img, im0s, vid_cap in dataset:
	img = torch.from_numpy(img).to(device)
	img = img.half() if half else img.float()
	img /= 255.0
	if img.ndimension() == 3:
	img = img.unsqueeze(0)

	# Warmup
	if device.type != 'cpu' and (old_img_b != img.shape[0] or old_img_h != img.shape[2] or old_img_w != img.shape[3]):
	old_img_b = img.shape[0]
	old_img_h = img.shape[2]
	old_img_w = img.shape[3]
	for _ in range(3):
	model(img)[0]

	# Inference
	with torch.no_grad():
	pred = model(img, augment=False)[0]

	# Apply NMS
	pred = non_max_suppression(pred, conf_thres, iou_thres)

	# Process detections
	for i, det in enumerate(pred):
	p = Path(path)
	save_path = str(save_dir / p.name.replace('.mp4', '_output.mp4'))
	im0 = im0s

	if len(det):
	det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
	item_boxes, scanner_data, phone_boxes = [], [], []
	curr_scanner_boxes = []

	# Process each detection
	for *xyxy, conf, cls in det:
	x1, y1, x2, y2 = map(int, xyxy)
	class_name = names[int(cls)]
	color = colors[int(cls)]
	if class_name.lower() == "item":
	item_boxes.append([x1, y1, x2, y2])
	elif class_name.lower() == "phone":
	phone_boxes.append([x1, y1, x2, y2])
	elif class_name.lower() == "scanner":
	curr_scanner_boxes.append([x1, y1, x2, y2])
	plot_one_box(xyxy, im0, label=class_name, color=color, line_thickness=2)

	# Match scanner boxes with previous frames
	new_prev_centroids = {}
	if prev_centroids and curr_scanner_boxes:
	for curr_box in curr_scanner_boxes:
	curr_centroid = ((curr_box[0] + curr_box[2]) / 2, (curr_box[1] + curr_box[3]) / 2)
	best_match_id = min(prev_centroids.keys(),
	key=lambda k: np.sqrt((curr_centroid[0] - prev_centroids[k][0])**2 +
	(curr_centroid[1] - prev_centroids[k][1])**2),
	default=None)
	if best_match_id is not None:
	distance = np.sqrt((curr_centroid[0] - prev_centroids[best_match_id][0])**2 +
	(curr_centroid[1] - prev_centroids[best_match_id][1])**2)
	if distance < 50:
	scanner_id = best_match_id
	else:
	scanner_id = scanner_id_counter
	scanner_id_counter += 1
	else:
	scanner_id = scanner_id_counter
	scanner_id_counter += 1
	is_moving = is_scanner_moving(prev_centroids, curr_box, scanner_id, threshold=2.0)
	movement_status = "Scanning" if is_moving else "Idle"
	scanner_data.append([curr_box, movement_status, scanner_id])
	new_prev_centroids[scanner_id] = curr_centroid
	elif curr_scanner_boxes:
	for curr_box in curr_scanner_boxes:
	scanner_id = scanner_id_counter
	scanner_id_counter += 1
	movement_status = "Idle"
	curr_centroid = ((curr_box[0] + curr_box[2]) / 2, (curr_box[1] + curr_box[3]) / 2)
	scanner_data.append([curr_box, movement_status, scanner_id])
	new_prev_centroids[scanner_id] = curr_centroid

	prev_centroids = new_prev_centroids

	# Redraw scanner boxes with movement status
	for scanner_box, movement_status, scanner_id in scanner_data:
	x1, y1, x2, y2 = scanner_box
	label = f"scanner {movement_status} (ID: {scanner_id})"
	plot_one_box([x1, y1, x2, y2], im0, label=label, color=colors[names.index("scanner")], line_thickness=2)

	# Check for overlaps only if scanning status hasn't been set
	if not product_scanning_status_global:
	for item_box in item_boxes:
	iou = compute_iou(scanner_box, item_box)
	if movement_status == "Scanning" and iou > 0.02:
	product_scanning_status_global = "Product scanning is finished"
	print(f"Product scanning finished at frame {i}")
	if not payment_scanning_status_global:
	for phone_box in phone_boxes:
	iou = compute_iou(scanner_box, phone_box)
	if movement_status == "Scanning" and iou > 0.02:
	payment_scanning_status_global = "Payment scanning is finished"
	print(f"Payment scanning finished at frame {i}")

	# Display persistent labels
	if product_scanning_status_global:
	cv2.putText(im0, product_scanning_status_global, (10, 30),
	cv2.FONT_HERSHEY_SIMPLEX, 0.9, colors[names.index("scanner")], 2)
	if payment_scanning_status_global:
	cv2.putText(im0, payment_scanning_status_global, (10, 60),
	cv2.FONT_HERSHEY_SIMPLEX, 0.9, colors[names.index("scanner")], 2)

	# Write frame to video
	if vid_path != save_path:
	vid_path = save_path
	if isinstance(vid_writer, cv2.VideoWriter):
	vid_writer.release()
	fps = vid_cap.get(cv2.CAP_PROP_FPS) if vid_cap else 30
	w, h = im0.shape[1], im0.shape[0]
	vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
	vid_writer.write(im0)

	# Cleanup
	if isinstance(vid_writer, cv2.VideoWriter):
	vid_writer.release()

	# Convert to H.264
	output_h264 = str(Path(save_path).with_name(f"{Path(save_path).stem}_h264.mp4"))
	try:
	stream = ffmpeg.input(save_path)
	stream = ffmpeg.output(stream, output_h264, vcodec='libx264', acodec='aac', format='mp4', pix_fmt='yuv420p')
	ffmpeg.run(stream, cmd='/usr/bin/ffmpeg', overwrite_output=True)
	os.remove(save_path)
	return output_h264
	except ffmpeg.Error as e:
	stderr = e.stderr.decode('utf-8') if e.stderr else "Unknown FFmpeg error"
	print(f"FFmpeg error: {stderr}")
	return save_path

	# Gradio interface
	def gradio_interface(video, conf_thres, iou_thres):
	weights = "/home/myominhtet/Desktop/deepsortfromscratch/yolov7/best.pt"
	img_size = 640

	# Create a stable directory for video files
	stable_dir = "/home/myominhtet/Desktop/deepsortfromscratch/videos"
	os.makedirs(stable_dir, exist_ok=True)

	# Copy the uploaded video to a stable path
	stable_path = os.path.join(stable_dir, f"input_{Path(video).name}")
	shutil.copy(video, stable_path)
	print(f"Copied video to: {stable_path}")

	# Verify the copied file
	print(f"Stable path exists: {os.path.exists(stable_path)}")
	print(f"Stable path readable: {os.access(stable_path, os.R_OK)}")

	video = convert_to_h264(stable_path)
	output_video = detect_video(video, weights, conf_thres, iou_thres, img_size)

	return output_video if output_video else "Error processing video."

	# Set up Gradio interface
	interface = gr.Interface(
	fn=gradio_interface,
	inputs=[
	gr.Video(label="Upload Video"),
	gr.Slider(0, 1, value=0.25, step=0.05, label="Confidence Threshold"),
	gr.Slider(0, 1, value=0.45, step=0.05, label="IoU Threshold"),
	],
	outputs=gr.Video(label="Processed Video"),
	title="Retail Shop Monitoring",
	# description="Upload a video to check "
	)

	# Set up FastAPI app
	app = FastAPI()
	app = gr.mount_gradio_app(app, interface, path="/")

	if __name__ == "__main__":
	uvicorn.run(app, host="0.0.0.0", port=8000)