"""UrbanEye — Autonomous Driving Perception Pipeline Demo."""
import os
import tempfile
import time
from pathlib import Path
import cv2
# ---------------------------------------------------------------------------
# Device detection — force CPU if CUDA arch unsupported
# ---------------------------------------------------------------------------
def _detect_device() -> str:
try:
import torch
if not torch.cuda.is_available():
return "cpu"
cap = torch.cuda.get_device_capability(0)
supported = torch.cuda.get_arch_list()
device_arch = f"sm_{cap[0]}{cap[1]}"
for s in supported:
if device_arch in s:
torch.zeros(1, device="cuda")
return "0"
return "cpu"
except Exception:
return "cpu"
_DEVICE = _detect_device()
if _DEVICE == "cpu":
os.environ["CUDA_VISIBLE_DEVICES"] = ""
print("[UrbanEye] Running on CPU (GPU not compatible with this PyTorch)")
else:
print(f"[UrbanEye] Running on GPU device {_DEVICE}")
SCRIPT_DIR = Path(__file__).parent
MODEL_PATH = SCRIPT_DIR / "models" / "best.pt"
CLASS_NAMES = ["vehicle", "pedestrian", "cyclist", "traffic_light", "traffic_sign"]
CLASS_DISPLAY = ["Vehicle", "Pedestrian", "Cyclist", "Traffic Light", "Traffic Sign"]
CLASS_COLORS = {
"vehicle": (0, 220, 80),
"pedestrian": (60, 60, 255),
"cyclist": (0, 160, 255),
"traffic_light": (0, 235, 235),
"traffic_sign": (220, 50, 220),
}
_model = None
def get_model():
global _model
if _model is None and MODEL_PATH.exists():
print(f"[UrbanEye] Loading model from {MODEL_PATH}...")
from ultralytics import YOLO
_model = YOLO(str(MODEL_PATH))
# Force to correct device
_model.to(f"cuda:{_DEVICE}" if _DEVICE != "cpu" else "cpu")
print(f"[UrbanEye] Model loaded on {'CPU' if _DEVICE == 'cpu' else 'GPU'}")
return _model
def draw_detections(frame, results, class_filter, conf_thresh):
out = frame.copy()
n = 0
counts = {c: 0 for c in CLASS_NAMES}
if not results or len(results) == 0:
return out, 0, counts
boxes = results[0].boxes
if boxes is None or len(boxes) == 0:
return out, 0, counts
for i in range(len(boxes)):
conf = float(boxes.conf[i])
cls_id = int(boxes.cls[i])
if conf < conf_thresh or cls_id >= len(CLASS_NAMES):
continue
cls = CLASS_NAMES[cls_id]
if cls not in class_filter:
continue
x1, y1, x2, y2 = map(int, boxes.xyxy[i])
c = CLASS_COLORS.get(cls, (200, 200, 200))
tid = f" #{int(boxes.id[i])}" if boxes.id is not None else ""
# fill
ov = out.copy()
cv2.rectangle(ov, (x1, y1), (x2, y2), c, -1)
cv2.addWeighted(ov, 0.15, out, 0.85, 0, out)
# border
cv2.rectangle(out, (x1, y1), (x2, y2), c, 2, cv2.LINE_AA)
# corners
cl = min(18, (x2 - x1) // 4, (y2 - y1) // 4)
for (cx, cy), (dx, dy) in [
((x1, y1), (1, 1)),
((x2, y1), (-1, 1)),
((x1, y2), (1, -1)),
((x2, y2), (-1, -1)),
]:
cv2.line(out, (cx, cy), (cx + dx * cl, cy), c, 3, cv2.LINE_AA)
cv2.line(out, (cx, cy), (cx, cy + dy * cl), c, 3, cv2.LINE_AA)
# label
lbl = f"{cls}{tid} {conf:.0%}"
(lw, lh), _ = cv2.getTextSize(lbl, cv2.FONT_HERSHEY_SIMPLEX, 0.48, 1)
ly = max(0, y1 - lh - 10)
cv2.rectangle(out, (x1, ly), (x1 + lw + 10, y1), c, -1, cv2.LINE_AA)
cv2.putText(
out, lbl, (x1 + 5, y1 - 4), cv2.FONT_HERSHEY_SIMPLEX, 0.48, (0, 0, 0), 1, cv2.LINE_AA
)
n += 1
counts[cls] += 1
return out, n, counts
def draw_hud(frame, fps, tracks, tracker, fnum, ftotal):
h, w = frame.shape[:2]
ov = frame.copy()
cv2.rectangle(ov, (0, 0), (w, 44), (0, 0, 0), -1)
cv2.addWeighted(ov, 0.65, frame, 0.35, 0, frame)
fc = (0, 230, 118) if fps > 20 else (0, 200, 255) if fps > 10 else (80, 80, 255)
cv2.putText(
frame, f"FPS {fps:.0f}", (14, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.75, fc, 2, cv2.LINE_AA
)
tt = tracker.upper()
(tw, _), _ = cv2.getTextSize(tt, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
cv2.putText(
frame,
tt,
(w // 2 - tw // 2, 30),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(160, 170, 200),
1,
cv2.LINE_AA,
)
tr = f"TRACKS {tracks}"
(tw2, _), _ = cv2.getTextSize(tr, cv2.FONT_HERSHEY_SIMPLEX, 0.65, 2)
cv2.putText(
frame, tr, (w - tw2 - 14, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.65, (0, 220, 255), 2, cv2.LINE_AA
)
if ftotal > 0:
p = fnum / ftotal
cv2.rectangle(frame, (0, h - 3), (int(w * p), h), (59, 130, 246), -1)
return frame
# ---------------------------------------------------------------------------
# Video processing
# ---------------------------------------------------------------------------
MAX_FRAMES_CPU = 150 # Cap at ~6 seconds of 25fps video on CPU
def _check_mp4_playable(path: str) -> bool:
"""Check if an mp4 file uses a browser-compatible codec."""
try:
import subprocess
r = subprocess.run(
[
"ffprobe",
"-v",
"error",
"-select_streams",
"v:0",
"-show_entries",
"stream=codec_name",
"-of",
"csv=p=0",
path,
],
capture_output=True,
text=True,
timeout=10,
)
codec = r.stdout.strip()
return codec in ("h264", "vp9", "av1")
except Exception:
return False
def run_video(video, tracker_type, confidence, classes):
if video is None:
return None, "Upload a video to begin."
model = get_model()
if model is None:
return None, "Model not found. Place best.pt in hf_space/models/"
print(f"\n[UrbanEye] Processing video: {video}")
print(f"[UrbanEye] Tracker={tracker_type}, Conf={confidence}, Device={_DEVICE}")
cap = cv2.VideoCapture(video)
if not cap.isOpened():
return None, f"Cannot open video: {video}"
fps_in = cap.get(cv2.CAP_PROP_FPS) or 25
w, h = int(cap.get(3)), int(cap.get(4))
total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
# Cap frames on CPU to prevent hanging
max_frames = total
if _DEVICE == "cpu" and total > MAX_FRAMES_CPU:
max_frames = MAX_FRAMES_CPU
print(f"[UrbanEye] CPU mode: capping at {max_frames} frames (of {total})")
print(f"[UrbanEye] Video: {w}x{h} @ {fps_in:.0f}fps, {total} frames, processing {max_frames}")
# Write as raw AVI first, then always convert to H.264 mp4 via ffmpeg
tmp = tempfile.NamedTemporaryFile(suffix=".avi", delete=False)
raw_path = tmp.name
tmp.close()
writer = cv2.VideoWriter(raw_path, cv2.VideoWriter_fourcc(*"XVID"), fps_in, (w, h))
if not writer.isOpened():
writer = cv2.VideoWriter(raw_path, cv2.VideoWriter_fourcc(*"MJPG"), fps_in, (w, h))
print(f"[UrbanEye] Writing raw video to {raw_path}")
tracker_yaml = "bytetrack.yaml" if tracker_type == "ByteTrack" else "botsort.yaml"
sel = [CLASS_NAMES[CLASS_DISPLAY.index(c)] for c in (classes or CLASS_DISPLAY)]
frame_count, total_det = 0, 0
ct = {nm: 0 for nm in CLASS_NAMES}
times = []
while frame_count < max_frames:
ok, frame = cap.read()
if not ok:
break
t0 = time.time()
res = model.track(
frame,
persist=True,
conf=confidence,
tracker=tracker_yaml,
verbose=False,
device=_DEVICE,
)
dt = time.time() - t0
times.append(dt)
fps = 1 / dt if dt > 0 else 0
ann, nt, co = draw_detections(frame, res, sel, confidence)
ann = draw_hud(ann, fps, nt, tracker_type, frame_count, max_frames)
writer.write(ann)
frame_count += 1
total_det += nt
for k, v in co.items():
ct[k] += v
# Print progress every 10 frames
if frame_count % 10 == 0 or frame_count == 1:
print(f"[UrbanEye] Frame {frame_count}/{max_frames} | FPS: {fps:.1f} | Tracks: {nt}")
cap.release()
writer.release()
# Convert AVI → H.264 MP4 for browser playback
import subprocess
out_path = raw_path.replace(".avi", "_out.mp4")
print("[UrbanEye] Converting to H.264 with ffmpeg...")
try:
result = subprocess.run(
[
"ffmpeg",
"-y",
"-i",
raw_path,
"-c:v",
"libx264",
"-preset",
"fast",
"-crf",
"23",
"-pix_fmt",
"yuv420p",
"-movflags",
"+faststart",
out_path,
],
capture_output=True,
text=True,
timeout=180,
)
if result.returncode != 0:
print(f"[UrbanEye] ffmpeg error: {result.stderr[-200:]}")
out_path = raw_path # fallback to raw avi
else:
print(f"[UrbanEye] Converted: {out_path} ({os.path.getsize(out_path) / 1e6:.1f} MB)")
os.unlink(raw_path) # clean up raw avi
except Exception as e:
print(f"[UrbanEye] ffmpeg failed: {e}")
out_path = raw_path
af = 1 / (sum(times) / len(times)) if times else 0
print(f"[UrbanEye] Done! {frame_count} frames, avg {af:.1f} FPS, {total_det} detections\n")
m = f"Tracker {tracker_type}\n"
m += f"Frames {frame_count}"
if max_frames < total:
m += f" (capped from {total})"
m += f"\nAvg FPS {af:.1f}\n"
m += f"Device {'CPU' if _DEVICE == 'cpu' else 'GPU'}\n"
m += f"Detections {total_det}\n"
m += f"Threshold {confidence}\n\n"
for nm in CLASS_NAMES:
dn = CLASS_DISPLAY[CLASS_NAMES.index(nm)]
bar = "\u2588" * min(20, ct[nm] // max(1, frame_count))
m += f" {dn:15s} {ct[nm]:>5d} {bar}\n"
return out_path, m
# ---------------------------------------------------------------------------
# Image processing
# ---------------------------------------------------------------------------
def run_image(image, confidence, classes):
if image is None:
return None, "Upload an image to detect."
model = get_model()
if model is None:
return None, "Model not found."
sel = [CLASS_NAMES[CLASS_DISPLAY.index(c)] for c in (classes or CLASS_DISPLAY)]
print(f"[UrbanEye] Running image detection, conf={confidence}, device={_DEVICE}")
t0 = time.time()
res = model(image, conf=confidence, verbose=False, device=_DEVICE)
dt = time.time() - t0
print(f"[UrbanEye] Image inference: {dt * 1000:.0f}ms")
ann, nd, co = draw_detections(image, res, sel, confidence)
m = f"Detections {nd}\nInference {dt * 1000:.1f} ms\nDevice {'CPU' if _DEVICE == 'cpu' else 'GPU'}\n\n"
for nm in CLASS_NAMES:
if co[nm] > 0:
m += f" {CLASS_DISPLAY[CLASS_NAMES.index(nm)]}: {co[nm]}\n"
return ann, m
# ---------------------------------------------------------------------------
# Theme + CSS
# ---------------------------------------------------------------------------
THEME_KWARGS = {
"body_background_fill": "#0b0f19",
"body_background_fill_dark": "#0b0f19",
"block_background_fill": "#111827",
"block_background_fill_dark": "#111827",
"block_border_color": "#1e293b",
"block_border_color_dark": "#1e293b",
"block_label_background_fill": "#1e293b",
"block_label_background_fill_dark": "#1e293b",
"block_label_text_color": "#94a3b8",
"block_label_text_color_dark": "#94a3b8",
"block_title_text_color": "#e2e8f0",
"block_title_text_color_dark": "#e2e8f0",
"body_text_color": "#e2e8f0",
"body_text_color_dark": "#e2e8f0",
"body_text_color_subdued": "#64748b",
"body_text_color_subdued_dark": "#64748b",
"button_primary_background_fill": "linear-gradient(135deg, #3b82f6, #6366f1)",
"button_primary_background_fill_dark": "linear-gradient(135deg, #3b82f6, #6366f1)",
"button_primary_background_fill_hover": "linear-gradient(135deg, #2563eb, #4f46e5)",
"button_primary_background_fill_hover_dark": "linear-gradient(135deg, #2563eb, #4f46e5)",
"button_primary_text_color": "#ffffff",
"button_primary_text_color_dark": "#ffffff",
"input_background_fill": "#0f172a",
"input_background_fill_dark": "#0f172a",
"input_border_color": "#1e293b",
"input_border_color_dark": "#1e293b",
"input_border_color_focus": "#3b82f6",
"input_border_color_focus_dark": "#3b82f6",
"border_color_primary": "#1e293b",
"border_color_primary_dark": "#1e293b",
}
CSS = """
* { font-family: 'Inter', sans-serif !important; }
.gradio-container { max-width: 1400px !important; margin: 0 auto !important; padding: 0 2rem !important; }
/* Radio & Checkbox label fix */
label.svelte-19qdtil,
.svelte-19qdtil[class*="selected"],
fieldset label {
background: #1e293b !important;
color: #cbd5e1 !important;
border: 1px solid #334155 !important;
border-radius: 8px !important;
}
label.svelte-19qdtil:hover, fieldset label:hover {
background: #334155 !important;
color: #f1f5f9 !important;
border-color: #3b82f6 !important;
}
label.svelte-19qdtil.selected, .svelte-19qdtil.selected {
background: rgba(59, 130, 246, 0.15) !important;
color: #60a5fa !important;
border-color: #3b82f6 !important;
}
label.svelte-19qdtil span, fieldset label span,
input[type="radio"] + span, input[type="checkbox"] + span {
color: inherit !important;
font-size: 0.85rem !important;
font-weight: 500 !important;
text-transform: none !important;
letter-spacing: normal !important;
}
input[type="checkbox"] { accent-color: #3b82f6 !important; }
/* hero */
#hero-title { text-align: center; margin-bottom: -8px; }
#hero-title h1 {
font-size: 2.6rem; font-weight: 800; letter-spacing: -0.04em;
background: linear-gradient(135deg, #60a5fa, #34d399, #a78bfa);
background-size: 200% 200%;
-webkit-background-clip: text; -webkit-text-fill-color: transparent;
background-clip: text;
animation: glow 8s ease infinite;
}
@keyframes glow {
0%,100%{background-position:0% 50%} 50%{background-position:100% 50%}
}
#hero-sub p { text-align:center; color:#94a3b8; font-size:0.95rem; margin-top:0; }
#hero-classes p {
text-align:center; font-size:0.82rem; color:#64748b;
padding:8px 0 4px 0; border-top:1px solid #1e293b; margin-top:4px;
}
/* tabs */
.tab-nav { border-bottom: 1px solid #1e293b !important; }
.tab-nav button {
font-weight: 600 !important; font-size: 0.88rem !important;
color: #64748b !important; padding: 10px 20px !important;
border: none !important; border-radius: 0 !important;
border-bottom: 2px solid transparent !important;
background: transparent !important;
}
.tab-nav button:hover { color: #e2e8f0 !important; }
.tab-nav button.selected {
color: #60a5fa !important;
border-bottom-color: #3b82f6 !important;
}
/* button */
button.primary, button.lg {
font-weight: 700 !important; text-transform: uppercase !important;
letter-spacing: 0.05em !important; font-size: 0.88rem !important;
border-radius: 10px !important; padding: 11px 0 !important;
transition: all 0.3s ease !important;
box-shadow: 0 4px 14px rgba(59,130,246,0.25) !important;
}
button.primary:hover, button.lg:hover {
transform: translateY(-1px) !important;
box-shadow: 0 6px 20px rgba(59,130,246,0.35) !important;
}
/* metrics mono */
#metrics textarea, #img-metrics textarea {
font-family: 'JetBrains Mono', monospace !important;
font-size: 0.82rem !important; line-height: 1.65 !important;
color: #6ee7b7 !important;
background: #0a0f1a !important;
border: 1px solid #1e293b !important;
border-radius: 10px !important;
}
/* labels */
label span { font-size: 0.75rem !important; letter-spacing:0.07em !important; text-transform:uppercase !important; }
/* about code blocks */
.prose pre {
background: #0f172a !important; border: 1px solid #1e293b !important;
border-radius: 10px !important; font-family: 'JetBrains Mono', monospace !important;
}
.prose code { color: #60a5fa !important; background: #1e293b !important; border-radius: 4px !important; padding: 1px 6px !important; }
.prose table th { background: #1e293b !important; color: #60a5fa !important; }
.prose table td { border-color: #1e293b !important; }
/* entrance */
.block { animation: fadeUp 0.5s ease both; }
@keyframes fadeUp { from { opacity:0; transform:translateY(12px); } to { opacity:1; transform:translateY(0); } }
footer { opacity: 0.3 !important; }
"""
JS = """
() => {
document.querySelectorAll('.block').forEach((el,i) => {
el.style.animationDelay = `${i*60}ms`;
});
}
"""
# ---------------------------------------------------------------------------
# App
# ---------------------------------------------------------------------------
def main():
import gradio as gr
theme = gr.themes.Base(
primary_hue=gr.themes.colors.blue,
secondary_hue=gr.themes.colors.indigo,
neutral_hue=gr.themes.colors.slate,
font=gr.themes.GoogleFont("Inter"),
font_mono=gr.themes.GoogleFont("JetBrains Mono"),
).set(**THEME_KWARGS)
with gr.Blocks(theme=theme, css=CSS, js=JS, title="UrbanEye") as demo:
gr.Markdown("# UrbanEye", elem_id="hero-title")
gr.Markdown(
"Autonomous Driving Perception • YOLOv11 • "
"ByteTrack + DeepSORT",
elem_id="hero-sub",
)
gr.Markdown(
"Vehicle • "
"Pedestrian • "
"Cyclist • "
"Traffic Light • "
"Traffic Sign",
elem_id="hero-classes",
)
with gr.Tabs():
with gr.TabItem("Video Tracking"):
with gr.Row():
with gr.Column(scale=5):
vid_in = gr.Video(label="Input Video", height=340)
with gr.Column(scale=5):
vid_out = gr.Video(label="Output", height=340)
with gr.Row():
with gr.Column(scale=3):
tracker = gr.Radio(
["ByteTrack", "DeepSORT"], value="ByteTrack", label="Tracker"
)
with gr.Column(scale=3):
conf = gr.Slider(0.10, 0.90, 0.35, step=0.05, label="Confidence")
with gr.Column(scale=4):
cls = gr.CheckboxGroup(CLASS_DISPLAY, value=CLASS_DISPLAY, label="Classes")
run_btn = gr.Button("Run Detection + Tracking", variant="primary", size="lg")
met = gr.Textbox(label="Metrics", lines=9, interactive=False, elem_id="metrics")
run_btn.click(run_video, [vid_in, tracker, conf, cls], [vid_out, met])
with gr.TabItem("Image Detection"):
with gr.Row():
with gr.Column(scale=5):
img_in = gr.Image(label="Input Image", type="numpy", height=360)
with gr.Column(scale=5):
img_out = gr.Image(label="Result", height=360)
with gr.Row():
with gr.Column(scale=4):
img_conf = gr.Slider(0.10, 0.90, 0.35, step=0.05, label="Confidence")
with gr.Column(scale=6):
img_cls = gr.CheckboxGroup(
CLASS_DISPLAY, value=CLASS_DISPLAY, label="Classes"
)
img_btn = gr.Button("Detect Objects", variant="primary", size="lg")
img_met = gr.Textbox(
label="Results", lines=6, interactive=False, elem_id="img-metrics"
)
img_btn.click(run_image, [img_in, img_conf, img_cls], [img_out, img_met])
with gr.TabItem("About"):
gr.Markdown("""
## Architecture
```
CARLA Simulator ──▶ YOLOv11n ──▶ ByteTrack / DeepSORT ──▶ MOT Evaluation
50K+ frames 5 classes Dual tracker MOTA · IDF1
```
| Spec | Detail |
|------|--------|
| Model | YOLOv11n — 2.6M params, 6.4 GFLOPs |
| Training | 50 epochs, AdamW, cosine LR, AMP |
| GPU | RTX 5070 Laptop (8.5 GB) |
| mAP@50 | 0.47 (baseline) |
| Classes | Vehicle, Pedestrian, Cyclist, Traffic Light, Traffic Sign |
| Tracker | Method | FPS | Strength |
|---------|--------|-----|----------|
| ByteTrack | IoU + Kalman, two-stage matching | 60+ | Speed, low-conf rescue |
| DeepSORT | IoU + Kalman + MobileNetV2 Re-ID | 25-35 | Identity through occlusion |
---
*Navnit Amrutharaj • [github.com/ninjacode911](https://github.com/ninjacode911)*
""")
print("[UrbanEye] Starting server...")
demo.launch(share=False)
if __name__ == "__main__":
main()