Spaces:

NinjainPJs
/

UrbanEye

Sleeping

App Files Files Community

UrbanEye / app.py

NinjainPJs

Deploy UrbanEye — YOLOv11 Detection + ByteTrack Tracking

115b303 verified 3 months ago

raw

history blame contribute delete

21.5 kB

	"""UrbanEye — Autonomous Driving Perception Pipeline Demo."""

	import os
	import tempfile
	import time
	from pathlib import Path

	import cv2


	# ---------------------------------------------------------------------------
	# Device detection — force CPU if CUDA arch unsupported
	# ---------------------------------------------------------------------------
	def _detect_device() -> str:
	try:
	import torch

	if not torch.cuda.is_available():
	return "cpu"
	cap = torch.cuda.get_device_capability(0)
	supported = torch.cuda.get_arch_list()
	device_arch = f"sm_{cap[0]}{cap[1]}"
	for s in supported:
	if device_arch in s:
	torch.zeros(1, device="cuda")
	return "0"
	return "cpu"
	except Exception:
	return "cpu"


	_DEVICE = _detect_device()
	if _DEVICE == "cpu":
	os.environ["CUDA_VISIBLE_DEVICES"] = ""
	print("[UrbanEye] Running on CPU (GPU not compatible with this PyTorch)")
	else:
	print(f"[UrbanEye] Running on GPU device {_DEVICE}")


	SCRIPT_DIR = Path(__file__).parent
	MODEL_PATH = SCRIPT_DIR / "models" / "best.pt"

	CLASS_NAMES = ["vehicle", "pedestrian", "cyclist", "traffic_light", "traffic_sign"]
	CLASS_DISPLAY = ["Vehicle", "Pedestrian", "Cyclist", "Traffic Light", "Traffic Sign"]
	CLASS_COLORS = {
	"vehicle": (0, 220, 80),
	"pedestrian": (60, 60, 255),
	"cyclist": (0, 160, 255),
	"traffic_light": (0, 235, 235),
	"traffic_sign": (220, 50, 220),
	}

	_model = None


	def get_model():
	global _model
	if _model is None and MODEL_PATH.exists():
	print(f"[UrbanEye] Loading model from {MODEL_PATH}...")
	from ultralytics import YOLO

	_model = YOLO(str(MODEL_PATH))
	# Force to correct device
	_model.to(f"cuda:{_DEVICE}" if _DEVICE != "cpu" else "cpu")
	print(f"[UrbanEye] Model loaded on {'CPU' if _DEVICE == 'cpu' else 'GPU'}")
	return _model


	def draw_detections(frame, results, class_filter, conf_thresh):
	out = frame.copy()
	n = 0
	counts = {c: 0 for c in CLASS_NAMES}
	if not results or len(results) == 0:
	return out, 0, counts
	boxes = results[0].boxes
	if boxes is None or len(boxes) == 0:
	return out, 0, counts

	for i in range(len(boxes)):
	conf = float(boxes.conf[i])
	cls_id = int(boxes.cls[i])
	if conf < conf_thresh or cls_id >= len(CLASS_NAMES):
	continue
	cls = CLASS_NAMES[cls_id]
	if cls not in class_filter:
	continue

	x1, y1, x2, y2 = map(int, boxes.xyxy[i])
	c = CLASS_COLORS.get(cls, (200, 200, 200))
	tid = f" #{int(boxes.id[i])}" if boxes.id is not None else ""

	# fill
	ov = out.copy()
	cv2.rectangle(ov, (x1, y1), (x2, y2), c, -1)
	cv2.addWeighted(ov, 0.15, out, 0.85, 0, out)
	# border
	cv2.rectangle(out, (x1, y1), (x2, y2), c, 2, cv2.LINE_AA)
	# corners
	cl = min(18, (x2 - x1) // 4, (y2 - y1) // 4)
	for (cx, cy), (dx, dy) in [
	((x1, y1), (1, 1)),
	((x2, y1), (-1, 1)),
	((x1, y2), (1, -1)),
	((x2, y2), (-1, -1)),
	]:
	cv2.line(out, (cx, cy), (cx + dx * cl, cy), c, 3, cv2.LINE_AA)
	cv2.line(out, (cx, cy), (cx, cy + dy * cl), c, 3, cv2.LINE_AA)
	# label
	lbl = f"{cls}{tid} {conf:.0%}"
	(lw, lh), _ = cv2.getTextSize(lbl, cv2.FONT_HERSHEY_SIMPLEX, 0.48, 1)
	ly = max(0, y1 - lh - 10)
	cv2.rectangle(out, (x1, ly), (x1 + lw + 10, y1), c, -1, cv2.LINE_AA)
	cv2.putText(
	out, lbl, (x1 + 5, y1 - 4), cv2.FONT_HERSHEY_SIMPLEX, 0.48, (0, 0, 0), 1, cv2.LINE_AA
	)
	n += 1
	counts[cls] += 1
	return out, n, counts


	def draw_hud(frame, fps, tracks, tracker, fnum, ftotal):
	h, w = frame.shape[:2]
	ov = frame.copy()
	cv2.rectangle(ov, (0, 0), (w, 44), (0, 0, 0), -1)
	cv2.addWeighted(ov, 0.65, frame, 0.35, 0, frame)
	fc = (0, 230, 118) if fps > 20 else (0, 200, 255) if fps > 10 else (80, 80, 255)
	cv2.putText(
	frame, f"FPS {fps:.0f}", (14, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.75, fc, 2, cv2.LINE_AA
	)
	tt = tracker.upper()
	(tw, _), _ = cv2.getTextSize(tt, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
	cv2.putText(
	frame,
	tt,
	(w // 2 - tw // 2, 30),
	cv2.FONT_HERSHEY_SIMPLEX,
	0.5,
	(160, 170, 200),
	1,
	cv2.LINE_AA,
	)
	tr = f"TRACKS {tracks}"
	(tw2, _), _ = cv2.getTextSize(tr, cv2.FONT_HERSHEY_SIMPLEX, 0.65, 2)
	cv2.putText(
	frame, tr, (w - tw2 - 14, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.65, (0, 220, 255), 2, cv2.LINE_AA
	)
	if ftotal > 0:
	p = fnum / ftotal
	cv2.rectangle(frame, (0, h - 3), (int(w * p), h), (59, 130, 246), -1)
	return frame


	# ---------------------------------------------------------------------------
	# Video processing
	# ---------------------------------------------------------------------------
	MAX_FRAMES_CPU = 150 # Cap at ~6 seconds of 25fps video on CPU


	def _check_mp4_playable(path: str) -> bool:
	"""Check if an mp4 file uses a browser-compatible codec."""
	try:
	import subprocess

	r = subprocess.run(
	[
	"ffprobe",
	"-v",
	"error",
	"-select_streams",
	"v:0",
	"-show_entries",
	"stream=codec_name",
	"-of",
	"csv=p=0",
	path,
	],
	capture_output=True,
	text=True,
	timeout=10,
	)
	codec = r.stdout.strip()
	return codec in ("h264", "vp9", "av1")
	except Exception:
	return False


	def run_video(video, tracker_type, confidence, classes):
	if video is None:
	return None, "Upload a video to begin."

	model = get_model()
	if model is None:
	return None, "Model not found. Place best.pt in hf_space/models/"

	print(f"\n[UrbanEye] Processing video: {video}")
	print(f"[UrbanEye] Tracker={tracker_type}, Conf={confidence}, Device={_DEVICE}")

	cap = cv2.VideoCapture(video)
	if not cap.isOpened():
	return None, f"Cannot open video: {video}"

	fps_in = cap.get(cv2.CAP_PROP_FPS) or 25
	w, h = int(cap.get(3)), int(cap.get(4))
	total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

	# Cap frames on CPU to prevent hanging
	max_frames = total
	if _DEVICE == "cpu" and total > MAX_FRAMES_CPU:
	max_frames = MAX_FRAMES_CPU
	print(f"[UrbanEye] CPU mode: capping at {max_frames} frames (of {total})")

	print(f"[UrbanEye] Video: {w}x{h} @ {fps_in:.0f}fps, {total} frames, processing {max_frames}")

	# Write as raw AVI first, then always convert to H.264 mp4 via ffmpeg
	tmp = tempfile.NamedTemporaryFile(suffix=".avi", delete=False)
	raw_path = tmp.name
	tmp.close()
	writer = cv2.VideoWriter(raw_path, cv2.VideoWriter_fourcc(*"XVID"), fps_in, (w, h))
	if not writer.isOpened():
	writer = cv2.VideoWriter(raw_path, cv2.VideoWriter_fourcc(*"MJPG"), fps_in, (w, h))
	print(f"[UrbanEye] Writing raw video to {raw_path}")

	tracker_yaml = "bytetrack.yaml" if tracker_type == "ByteTrack" else "botsort.yaml"
	sel = [CLASS_NAMES[CLASS_DISPLAY.index(c)] for c in (classes or CLASS_DISPLAY)]

	frame_count, total_det = 0, 0
	ct = {nm: 0 for nm in CLASS_NAMES}
	times = []

	while frame_count < max_frames:
	ok, frame = cap.read()
	if not ok:
	break

	t0 = time.time()
	res = model.track(
	frame,
	persist=True,
	conf=confidence,
	tracker=tracker_yaml,
	verbose=False,
	device=_DEVICE,
	)
	dt = time.time() - t0
	times.append(dt)
	fps = 1 / dt if dt > 0 else 0

	ann, nt, co = draw_detections(frame, res, sel, confidence)
	ann = draw_hud(ann, fps, nt, tracker_type, frame_count, max_frames)
	writer.write(ann)

	frame_count += 1
	total_det += nt
	for k, v in co.items():
	ct[k] += v

	# Print progress every 10 frames
	if frame_count % 10 == 0 or frame_count == 1:
	print(f"[UrbanEye] Frame {frame_count}/{max_frames} \| FPS: {fps:.1f} \| Tracks: {nt}")

	cap.release()
	writer.release()

	# Convert AVI → H.264 MP4 for browser playback
	import subprocess

	out_path = raw_path.replace(".avi", "_out.mp4")
	print("[UrbanEye] Converting to H.264 with ffmpeg...")
	try:
	result = subprocess.run(
	[
	"ffmpeg",
	"-y",
	"-i",
	raw_path,
	"-c:v",
	"libx264",
	"-preset",
	"fast",
	"-crf",
	"23",
	"-pix_fmt",
	"yuv420p",
	"-movflags",
	"+faststart",
	out_path,
	],
	capture_output=True,
	text=True,
	timeout=180,
	)
	if result.returncode != 0:
	print(f"[UrbanEye] ffmpeg error: {result.stderr[-200:]}")
	out_path = raw_path # fallback to raw avi
	else:
	print(f"[UrbanEye] Converted: {out_path} ({os.path.getsize(out_path) / 1e6:.1f} MB)")
	os.unlink(raw_path) # clean up raw avi
	except Exception as e:
	print(f"[UrbanEye] ffmpeg failed: {e}")
	out_path = raw_path

	af = 1 / (sum(times) / len(times)) if times else 0
	print(f"[UrbanEye] Done! {frame_count} frames, avg {af:.1f} FPS, {total_det} detections\n")

	m = f"Tracker {tracker_type}\n"
	m += f"Frames {frame_count}"
	if max_frames < total:
	m += f" (capped from {total})"
	m += f"\nAvg FPS {af:.1f}\n"
	m += f"Device {'CPU' if _DEVICE == 'cpu' else 'GPU'}\n"
	m += f"Detections {total_det}\n"
	m += f"Threshold {confidence}\n\n"
	for nm in CLASS_NAMES:
	dn = CLASS_DISPLAY[CLASS_NAMES.index(nm)]
	bar = "\u2588" * min(20, ct[nm] // max(1, frame_count))
	m += f" {dn:15s} {ct[nm]:>5d} {bar}\n"
	return out_path, m


	# ---------------------------------------------------------------------------
	# Image processing
	# ---------------------------------------------------------------------------
	def run_image(image, confidence, classes):
	if image is None:
	return None, "Upload an image to detect."

	model = get_model()
	if model is None:
	return None, "Model not found."

	sel = [CLASS_NAMES[CLASS_DISPLAY.index(c)] for c in (classes or CLASS_DISPLAY)]

	print(f"[UrbanEye] Running image detection, conf={confidence}, device={_DEVICE}")
	t0 = time.time()
	res = model(image, conf=confidence, verbose=False, device=_DEVICE)
	dt = time.time() - t0
	print(f"[UrbanEye] Image inference: {dt * 1000:.0f}ms")

	ann, nd, co = draw_detections(image, res, sel, confidence)
	m = f"Detections {nd}\nInference {dt * 1000:.1f} ms\nDevice {'CPU' if _DEVICE == 'cpu' else 'GPU'}\n\n"
	for nm in CLASS_NAMES:
	if co[nm] > 0:
	m += f" {CLASS_DISPLAY[CLASS_NAMES.index(nm)]}: {co[nm]}\n"
	return ann, m


	# ---------------------------------------------------------------------------
	# Theme + CSS
	# ---------------------------------------------------------------------------
	THEME_KWARGS = {
	"body_background_fill": "#0b0f19",
	"body_background_fill_dark": "#0b0f19",
	"block_background_fill": "#111827",
	"block_background_fill_dark": "#111827",
	"block_border_color": "#1e293b",
	"block_border_color_dark": "#1e293b",
	"block_label_background_fill": "#1e293b",
	"block_label_background_fill_dark": "#1e293b",
	"block_label_text_color": "#94a3b8",
	"block_label_text_color_dark": "#94a3b8",
	"block_title_text_color": "#e2e8f0",
	"block_title_text_color_dark": "#e2e8f0",
	"body_text_color": "#e2e8f0",
	"body_text_color_dark": "#e2e8f0",
	"body_text_color_subdued": "#64748b",
	"body_text_color_subdued_dark": "#64748b",
	"button_primary_background_fill": "linear-gradient(135deg, #3b82f6, #6366f1)",
	"button_primary_background_fill_dark": "linear-gradient(135deg, #3b82f6, #6366f1)",
	"button_primary_background_fill_hover": "linear-gradient(135deg, #2563eb, #4f46e5)",
	"button_primary_background_fill_hover_dark": "linear-gradient(135deg, #2563eb, #4f46e5)",
	"button_primary_text_color": "#ffffff",
	"button_primary_text_color_dark": "#ffffff",
	"input_background_fill": "#0f172a",
	"input_background_fill_dark": "#0f172a",
	"input_border_color": "#1e293b",
	"input_border_color_dark": "#1e293b",
	"input_border_color_focus": "#3b82f6",
	"input_border_color_focus_dark": "#3b82f6",
	"border_color_primary": "#1e293b",
	"border_color_primary_dark": "#1e293b",
	}

	CSS = """
	* { font-family: 'Inter', sans-serif !important; }
	.gradio-container { max-width: 1400px !important; margin: 0 auto !important; padding: 0 2rem !important; }

	/* Radio & Checkbox label fix */
	label.svelte-19qdtil,
	.svelte-19qdtil[class*="selected"],
	fieldset label {
	background: #1e293b !important;
	color: #cbd5e1 !important;
	border: 1px solid #334155 !important;
	border-radius: 8px !important;
	}
	label.svelte-19qdtil:hover, fieldset label:hover {
	background: #334155 !important;
	color: #f1f5f9 !important;
	border-color: #3b82f6 !important;
	}
	label.svelte-19qdtil.selected, .svelte-19qdtil.selected {
	background: rgba(59, 130, 246, 0.15) !important;
	color: #60a5fa !important;
	border-color: #3b82f6 !important;
	}
	label.svelte-19qdtil span, fieldset label span,
	input[type="radio"] + span, input[type="checkbox"] + span {
	color: inherit !important;
	font-size: 0.85rem !important;
	font-weight: 500 !important;
	text-transform: none !important;
	letter-spacing: normal !important;
	}
	input[type="checkbox"] { accent-color: #3b82f6 !important; }

	/* hero */
	#hero-title { text-align: center; margin-bottom: -8px; }
	#hero-title h1 {
	font-size: 2.6rem; font-weight: 800; letter-spacing: -0.04em;
	background: linear-gradient(135deg, #60a5fa, #34d399, #a78bfa);
	background-size: 200% 200%;
	-webkit-background-clip: text; -webkit-text-fill-color: transparent;
	background-clip: text;
	animation: glow 8s ease infinite;
	}
	@keyframes glow {
	0%,100%{background-position:0% 50%} 50%{background-position:100% 50%}
	}
	#hero-sub p { text-align:center; color:#94a3b8; font-size:0.95rem; margin-top:0; }
	#hero-classes p {
	text-align:center; font-size:0.82rem; color:#64748b;
	padding:8px 0 4px 0; border-top:1px solid #1e293b; margin-top:4px;
	}

	/* tabs */
	.tab-nav { border-bottom: 1px solid #1e293b !important; }
	.tab-nav button {
	font-weight: 600 !important; font-size: 0.88rem !important;
	color: #64748b !important; padding: 10px 20px !important;
	border: none !important; border-radius: 0 !important;
	border-bottom: 2px solid transparent !important;
	background: transparent !important;
	}
	.tab-nav button:hover { color: #e2e8f0 !important; }
	.tab-nav button.selected {
	color: #60a5fa !important;
	border-bottom-color: #3b82f6 !important;
	}

	/* button */
	button.primary, button.lg {
	font-weight: 700 !important; text-transform: uppercase !important;
	letter-spacing: 0.05em !important; font-size: 0.88rem !important;
	border-radius: 10px !important; padding: 11px 0 !important;
	transition: all 0.3s ease !important;
	box-shadow: 0 4px 14px rgba(59,130,246,0.25) !important;
	}
	button.primary:hover, button.lg:hover {
	transform: translateY(-1px) !important;
	box-shadow: 0 6px 20px rgba(59,130,246,0.35) !important;
	}

	/* metrics mono */
	#metrics textarea, #img-metrics textarea {
	font-family: 'JetBrains Mono', monospace !important;
	font-size: 0.82rem !important; line-height: 1.65 !important;
	color: #6ee7b7 !important;
	background: #0a0f1a !important;
	border: 1px solid #1e293b !important;
	border-radius: 10px !important;
	}

	/* labels */
	label span { font-size: 0.75rem !important; letter-spacing:0.07em !important; text-transform:uppercase !important; }

	/* about code blocks */
	.prose pre {
	background: #0f172a !important; border: 1px solid #1e293b !important;
	border-radius: 10px !important; font-family: 'JetBrains Mono', monospace !important;
	}
	.prose code { color: #60a5fa !important; background: #1e293b !important; border-radius: 4px !important; padding: 1px 6px !important; }
	.prose table th { background: #1e293b !important; color: #60a5fa !important; }
	.prose table td { border-color: #1e293b !important; }

	/* entrance */
	.block { animation: fadeUp 0.5s ease both; }
	@keyframes fadeUp { from { opacity:0; transform:translateY(12px); } to { opacity:1; transform:translateY(0); } }

	footer { opacity: 0.3 !important; }
	"""

	JS = """
	() => {
	document.querySelectorAll('.block').forEach((el,i) => {
	el.style.animationDelay = `${i*60}ms`;
	});
	}
	"""


	# ---------------------------------------------------------------------------
	# App
	# ---------------------------------------------------------------------------
	def main():
	import gradio as gr

	theme = gr.themes.Base(
	primary_hue=gr.themes.colors.blue,
	secondary_hue=gr.themes.colors.indigo,
	neutral_hue=gr.themes.colors.slate,
	font=gr.themes.GoogleFont("Inter"),
	font_mono=gr.themes.GoogleFont("JetBrains Mono"),
	).set(**THEME_KWARGS)

	with gr.Blocks(theme=theme, css=CSS, js=JS, title="UrbanEye") as demo:
	gr.Markdown("# UrbanEye", elem_id="hero-title")
	gr.Markdown(
	"Autonomous Driving Perception  •  YOLOv11  •  "
	"ByteTrack + DeepSORT",
	elem_id="hero-sub",
	)
	gr.Markdown(
	"<span style='color:#34d399'>Vehicle</span>  •  "
	"<span style='color:#f87171'>Pedestrian</span>  •  "
	"<span style='color:#fb923c'>Cyclist</span>  •  "
	"<span style='color:#facc15'>Traffic Light</span>  •  "
	"<span style='color:#c084fc'>Traffic Sign</span>",
	elem_id="hero-classes",
	)

	with gr.Tabs():
	with gr.TabItem("Video Tracking"):
	with gr.Row():
	with gr.Column(scale=5):
	vid_in = gr.Video(label="Input Video", height=340)
	with gr.Column(scale=5):
	vid_out = gr.Video(label="Output", height=340)

	with gr.Row():
	with gr.Column(scale=3):
	tracker = gr.Radio(
	["ByteTrack", "DeepSORT"], value="ByteTrack", label="Tracker"
	)
	with gr.Column(scale=3):
	conf = gr.Slider(0.10, 0.90, 0.35, step=0.05, label="Confidence")
	with gr.Column(scale=4):
	cls = gr.CheckboxGroup(CLASS_DISPLAY, value=CLASS_DISPLAY, label="Classes")

	run_btn = gr.Button("Run Detection + Tracking", variant="primary", size="lg")
	met = gr.Textbox(label="Metrics", lines=9, interactive=False, elem_id="metrics")
	run_btn.click(run_video, [vid_in, tracker, conf, cls], [vid_out, met])

	with gr.TabItem("Image Detection"):
	with gr.Row():
	with gr.Column(scale=5):
	img_in = gr.Image(label="Input Image", type="numpy", height=360)
	with gr.Column(scale=5):
	img_out = gr.Image(label="Result", height=360)

	with gr.Row():
	with gr.Column(scale=4):
	img_conf = gr.Slider(0.10, 0.90, 0.35, step=0.05, label="Confidence")
	with gr.Column(scale=6):
	img_cls = gr.CheckboxGroup(
	CLASS_DISPLAY, value=CLASS_DISPLAY, label="Classes"
	)

	img_btn = gr.Button("Detect Objects", variant="primary", size="lg")
	img_met = gr.Textbox(
	label="Results", lines=6, interactive=False, elem_id="img-metrics"
	)
	img_btn.click(run_image, [img_in, img_conf, img_cls], [img_out, img_met])

	with gr.TabItem("About"):
	gr.Markdown("""
	## Architecture

	```
	CARLA Simulator ──▶ YOLOv11n ──▶ ByteTrack / DeepSORT ──▶ MOT Evaluation
	50K+ frames 5 classes Dual tracker MOTA · IDF1
	```

	\| Spec \| Detail \|
	\|------\|--------\|
	\| Model \| YOLOv11n — 2.6M params, 6.4 GFLOPs \|
	\| Training \| 50 epochs, AdamW, cosine LR, AMP \|
	\| GPU \| RTX 5070 Laptop (8.5 GB) \|
	\| mAP@50 \| 0.47 (baseline) \|
	\| Classes \| Vehicle, Pedestrian, Cyclist, Traffic Light, Traffic Sign \|

	\| Tracker \| Method \| FPS \| Strength \|
	\|---------\|--------\|-----\|----------\|
	\| ByteTrack \| IoU + Kalman, two-stage matching \| 60+ \| Speed, low-conf rescue \|
	\| DeepSORT \| IoU + Kalman + MobileNetV2 Re-ID \| 25-35 \| Identity through occlusion \|

	---
	Navnit Amrutharaj • [github.com/ninjacode911](https://github.com/ninjacode911)
	""")

	print("[UrbanEye] Starting server...")
	demo.launch(share=False)


	if __name__ == "__main__":
	main()