import streamlit as st import tensorflow as tf import numpy as np import cv2 import tempfile, os, time from ultralytics import YOLO from huggingface_hub import hf_hub_download # ── Page config ─────────────────────────────────────────────── st.set_page_config( page_title="ShopGuard AI", page_icon="🛡️", layout="wide" ) st.markdown(""" """, unsafe_allow_html=True) # ── Config ──────────────────────────────────────────────────── FRAMES_PER_VIDEO = 16 IMG_SIZE = 224 PERSON_CLASS = 0 YOLO_CONF = 0.3 PAD = 0.10 MODEL_CONFIGS = { "Model A — General": { "repo_id": "higsboson/shoplifting_exp_a", "filename": "shoplifting_a.keras", "default_threshold": 0.50, "label": "A" }, "Model B — Kitchen": { "repo_id": "higsboson/shoplifting_exp_b", "filename": "best_model.keras", "default_threshold": 0.50, "label": "B" }, "Model C — Lab": { "repo_id": "higsboson/shoplifting_exp_c", "filename": "shoplifting_exp_c.keras", "default_threshold": 0.50, "label": "C" }, } # ── Loaders ─────────────────────────────────────────────────── @st.cache_resource def load_yolo(): return YOLO("yolo11n.pt") @st.cache_resource def load_mobilenet(): base = tf.keras.applications.MobileNetV2( input_shape=(IMG_SIZE, IMG_SIZE, 3), include_top=False, pooling="avg", weights="imagenet" ) base.trainable = False return base @st.cache_resource def load_lstm(repo_id, filename): path = hf_hub_download(repo_id=repo_id, filename=filename) return tf.keras.models.load_model(path) # ── Pipeline ────────────────────────────────────────────────── def extract_frames(video_path, n=FRAMES_PER_VIDEO): cap = cv2.VideoCapture(video_path) total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) idxs = np.linspace(0, max(total - 1, 0), n, dtype=int) frames = {} for idx in idxs: cap.set(cv2.CAP_PROP_POS_FRAMES, int(idx)) ret, frame = cap.read() if ret: frames[idx] = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) cap.release() return frames, idxs def crop_person(frame, yolo_model, last_box): h, w = frame.shape[:2] results = yolo_model(frame, conf=YOLO_CONF, classes=[PERSON_CLASS], verbose=False) boxes = results[0].boxes if boxes is not None and len(boxes): best = max(boxes, key=lambda b: b.conf.item()) x1, y1, x2, y2 = map(int, best.xyxy[0].tolist()) pw, ph = x2 - x1, y2 - y1 x1 = max(0, x1 - int(PAD * pw)) y1 = max(0, y1 - int(PAD * ph)) x2 = min(w, x2 + int(PAD * pw)) y2 = min(h, y2 + int(PAD * ph)) last_box[0] = (x1, y1, x2, y2) if last_box[0]: x1, y1, x2, y2 = last_box[0] crop = frame[y1:y2, x1:x2] else: crop = frame return cv2.resize(crop, (IMG_SIZE, IMG_SIZE)) def run_inference(video_path, yolo_model, mobilenet, lstm_model, threshold): frames_dict, idxs = extract_frames(video_path) last_box = [None] crops = [] for idx in idxs: frame = frames_dict.get(idx, np.zeros((IMG_SIZE, IMG_SIZE, 3), np.uint8)) crops.append(crop_person(frame, yolo_model, last_box)) crops_arr = np.array(crops, dtype=np.float32) crops_pp = tf.keras.applications.mobilenet_v2.preprocess_input(crops_arr) features = mobilenet.predict(crops_pp, verbose=0) features = features[np.newaxis, ...] prob = lstm_model.predict(features, verbose=0)[0][0] label = "SHOPLIFTING" if prob >= threshold else "NORMAL" return float(prob), label, crops # ── Header ──────────────────────────────────────────────────── st.markdown("""
🛡️ SHOPGUARD AI
YOLO11n → MobileNetV2 → Attention LSTM | FYP Demo System