Spaces:

mastefan
/

FencingScoreDetector

Sleeping

App Files Files Community

mastefan commited on Oct 4, 2025

Commit

1608755

verified ·

1 Parent(s): 8d175d8

Update app.py

Browse files

Files changed (1) hide show

app.py +318 -148

app.py CHANGED Viewed

@@ -14,76 +14,125 @@
 #
 # Fencing Scoreboard Clips - YOLO x AutoGluon (Gradio)
-import gradio as gr
-import cv2
 import numpy as np
-import pathlib
-import zipfile
-import shutil
 import pandas as pd
-import subprocess
 from ultralytics import YOLO
-from autogluon.tabular import TabularPredictor
-from huggingface_hub import hf_hub_download
-# -------------------
-# Globals and setup
-# -------------------
 CACHE_DIR = pathlib.Path("hf_assets")
-CACHE_DIR.mkdir(exist_ok=True)
 DEBUG_DIR = pathlib.Path("debug_frames")
 DEBUG_DIR.mkdir(exist_ok=True)
-YOLO_CONF = 0.25
-YOLO_IOU = 0.45
-KEEP_CONF = 0.70
-CLIP_PAD_S = 2.0
-GROUP_GAP_S = 1.0
-AG_REPO_ID = "your-hf-username/your-predictor-repo"
-AG_ZIP_NAME = "predictor_native.zip"
-_yolo_model = None
-_ag_predictor = None
-def yolo():
-    global _yolo_model
-    if _yolo_model is None:
-        print("[INFO] Loading YOLO model...")
-        _yolo_model = YOLO("yolov8n.pt")  # replace with your fine-tuned model path
-    return _yolo_model
-def ag_predictor():
-    global _ag_predictor
-    if _ag_predictor is None:
-        print("[INFO] Loading AutoGluon predictor from HF Hub...")
-        z = hf_hub_download(repo_id=AG_REPO_ID, filename=AG_ZIP_NAME, cache_dir=CACHE_DIR)
-        extract_dir = CACHE_DIR / "ag_predictor_native"
-        if extract_dir.exists():
-            shutil.rmtree(extract_dir)
-        with zipfile.ZipFile(z, "r") as zip_ref:
-            zip_ref.extractall(extract_dir)
-        _ag_predictor = TabularPredictor.load(
-            str(extract_dir),
-            require_version_match=False,
-            require_py_version_match=False
-        )
-    return _ag_predictor
-# -------------------
-# Scoreboard isolation
-# -------------------
 def isolate_scoreboard_color(frame_bgr: np.ndarray,
                              conf: float = YOLO_CONF,
                              iou: float = YOLO_IOU,
                              keep_conf: float = KEEP_CONF,
                              debug: bool = False,
                              frame_id: int = None) -> np.ndarray:
     H, W = frame_bgr.shape[:2]
     gray = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2GRAY)
     gray = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)
-    primary_thr = max(0.70, keep_conf)
     fallback_thr = max(0.65, primary_thr - 0.05)
     chosen_box = None
@@ -91,7 +140,7 @@ def isolate_scoreboard_color(frame_bgr: np.ndarray,
     if len(res):
         r = res[0]
         if getattr(r, "boxes", None) is not None and len(r.boxes) > 0:
-            boxes = r.boxes.xyxy.cpu().numpy()
             scores = r.boxes.conf.cpu().numpy()
             candidates = list(zip(boxes, scores))
@@ -114,92 +163,240 @@ def isolate_scoreboard_color(frame_bgr: np.ndarray,
         dbg = gray.copy()
         if chosen_box is not None:
             x1, y1, x2, y2 = [int(round(v)) for v in chosen_box]
-            cv2.rectangle(dbg, (x1, y1), (x2, y2), (0, 255, 0), 2)
         out_path = DEBUG_DIR / f"frame_{frame_id:06d}.jpg"
         cv2.imwrite(str(out_path), dbg)
         print(f"[DEBUG] Saved debug frame → {out_path}")
     return gray
-# -------------------
-# Event picking
-# -------------------
-def pick_events(df: pd.DataFrame, score: pd.Series, fps: float) -> list:
-    # simple hybrid threshold (as tuned earlier)
     max_score = score.max()
     raw_cutoff = 0.7 * max_score if max_score > 0 else 0.4
-    z = (score - score.rolling(45, min_periods=1).mean()) / (score.rolling(45, min_periods=1).std()+1e-9)
-    z_cutoff = max(2.0, 0.6 * z.max())
     out_times = []
-    for i in range(1, len(score)-1):
-        ts = float(df.iloc[i]["timestamp"])
-        if ((score.iloc[i] > raw_cutoff) or (z.iloc[i] > z_cutoff)):
-            if score.iloc[i] > score.iloc[i-1] and score.iloc[i] > score.iloc[i+1]:
-                if ts >= 1.0:  # guard against first second
-                    out_times.append(ts)
     grouped = []
     for t in out_times:
         if (not grouped) or (t - grouped[-1]) > GROUP_GAP_S:
             grouped.append(t)
     return grouped
-# -------------------
-# Video clipping
-# -------------------
-def cut_clip(video_path, start, end, out_path):
-    cmd = [
-        "ffmpeg", "-y", "-i", str(video_path),
-        "-ss", str(start), "-to", str(end),
-        "-c", "copy", str(out_path)
-    ]
-    subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-def extract_score_clips(video_path: str, debug: bool = False):
-    cap = cv2.VideoCapture(video_path)
-    if not cap.isOpened():
-        return [], "❌ Could not open video."
-    fps = cap.get(cv2.CAP_PROP_FPS)
-    frames = []
-    timestamps = []
-    idx = 0
-    while True:
-        ret, frame = cap.read()
-        if not ret:
-            break
-        ts = idx / fps
-        masked = isolate_scoreboard_color(frame, debug=debug, frame_id=idx if debug else None)
-        red_ratio = float((masked[:,:,2] > 150).mean())  # crude feature
-        frames.append([ts, red_ratio])
-        timestamps.append(ts)
-        idx += 1
-    cap.release()
-    if not frames:
-        return [], "⚠️ No frames processed."
-    df = pd.DataFrame(frames, columns=["timestamp","red_ratio"])
-    pred = ag_predictor().predict_proba(df[["red_ratio"]])
-    score = pd.Series(pred[1].values, index=df.index)
     events = pick_events(df, score, fps)
     if not events:
         return [], "⚠️ No touches confidently detected in this video."
     clips = []
-    for i, t in enumerate(events, 1):
-        s = max(0.0, t - CLIP_PAD_S)
-        e = min(df["timestamp"].max(), t + CLIP_PAD_S)
-        out_path = f"clip_{i}.mp4"
-        cut_clip(video_path, s, e, out_path)
-        clips.append((out_path, f"Touch at {t:.2f}s"))
-    return clips, f"✅ Detected {len(events)} touches."
-# -------------------
-# Progress bar builder
-# -------------------
 def _make_progress_bar(percent: int, final_text: str = None):
     text = f"{percent}%" if not final_text else final_text
     return f"""
@@ -209,38 +406,11 @@ def _make_progress_bar(percent: int, final_text: str = None):
     </div>
     """
-# -------------------
-# Wrapped run (step-based)
-# -------------------
-def wrapped_run(video_file):
     if not video_file:
-        yield gr.update(value=[], visible=False), "Please upload a video file.", gr.update(value="", visible=False)
         return
-    yield gr.update(value=[], visible=False), "Processing started...", gr.update(value=_make_progress_bar(10), visible=True)
-    yield gr.update(value=[], visible=False), "Extracting frames...", gr.update(value=_make_progress_bar(40), visible=True)
-    yield gr.update(value=[], visible=False), "Running predictor...", gr.update(value=_make_progress_bar(70), visible=True)
-    clips, status_msg = extract_score_clips(video_file, debug=False)
-    final_bar = _make_progress_bar(100, "✅ Done")
-    yield gr.update(value=clips, visible=bool(clips)), status_msg, gr.update(value=final_bar, visible=True)
-# -------------------
-# Gradio UI
-# -------------------
-with gr.Blocks() as demo:
-    gr.Markdown("## 🤺 Fencing Score Detector\nUpload a bout video and detect touches.")
-    in_video = gr.Video(label="Upload Bout Video")   # fixed: no type="filepath"
-    run_btn = gr.Button("Detect Touches", elem_id="detect-btn")
-    status = gr.Markdown("Status messages will appear here.")
-    progress_html = gr.HTML("")
-    gallery = gr.Gallery(label="Detected Clips", visible=False)
-    run_btn.click(fn=wrapped_run, inputs=in_video, outputs=[gallery, status, progress_html])
-if __name__ == "__main__":
-    demo.queue(max_size=20)
-    demo.launch(debug=True)

 #
 # Fencing Scoreboard Clips - YOLO x AutoGluon (Gradio)
+# -*- coding: utf-8 -*-
+# Fencing Scoreboard Clips - YOLO x AutoGluon (Gradio)
+import os, sys, zipfile, shutil, subprocess, tempfile, pathlib
+from typing import List, Tuple
+import uuid
 import numpy as np
 import pandas as pd
+import cv2
+import gradio as gr
+def _pip(pkgs: List[str]):
+    import subprocess, sys
+    subprocess.check_call([sys.executable, "-m", "pip", "install", "--quiet", *pkgs])
+try:
+    import ultralytics
+except:
+    _pip(["ultralytics"])
+    import ultralytics
+try:
+    import ffmpeg
+except:
+    try:
+        _pip(["ffmpeg-python"])
+        import ffmpeg
+    except:
+        ffmpeg = None
+try:
+    from autogluon.tabular import TabularPredictor
+except:
+    _pip(["autogluon.tabular"])
+    from autogluon.tabular import TabularPredictor
+try:
+    from huggingface_hub import hf_hub_download
+except:
+    _pip(["huggingface_hub"])
+    from huggingface_hub import hf_hub_download
 from ultralytics import YOLO
+# ----------------------------
+# Config — Hugging Face repos
+# ----------------------------
+YOLO_REPO_ID   = os.getenv("YOLO_REPO_ID",   "mastefan/fencing-scoreboard-yolov8")
+YOLO_FILENAME  = os.getenv("YOLO_FILENAME",  "best.pt")
+AG_REPO_ID     = os.getenv("AG_REPO_ID",     "emkessle/2024-24679-fencing-touch-predictor")
+AG_ZIP_NAME    = os.getenv("AG_ZIP_NAME",    "autogluon_predictor_dir.zip")
+FRAME_SKIP     = int(os.getenv("FRAME_SKIP", "2"))
+KEEP_CONF      = float(os.getenv("KEEP_CONF", "0.85"))
+YOLO_CONF      = float(os.getenv("YOLO_CONF", "0.25"))
+YOLO_IOU       = float(os.getenv("YOLO_IOU",  "0.50"))
+GROUP_GAP_S    = float(os.getenv("GROUP_GAP_S","1.5"))
+CLIP_PAD_S     = float(os.getenv("CLIP_PAD_S","2.0"))
 CACHE_DIR = pathlib.Path("hf_assets")
+CACHE_DIR.mkdir(parents=True, exist_ok=True)
 DEBUG_DIR = pathlib.Path("debug_frames")
 DEBUG_DIR.mkdir(exist_ok=True)
+# ----------------
+# Model loaders
+# ----------------
+def load_yolo_from_hub() -> YOLO:
+    w = hf_hub_download(repo_id=YOLO_REPO_ID, filename=YOLO_FILENAME, cache_dir=CACHE_DIR)
+    return YOLO(w)
+def load_autogluon_tabular_from_hub() -> TabularPredictor:
+    z = hf_hub_download(repo_id=AG_REPO_ID, filename=AG_ZIP_NAME, cache_dir=CACHE_DIR)
+    extract_dir = CACHE_DIR / "ag_predictor_native"
+    if extract_dir.exists():
+        shutil.rmtree(extract_dir)
+    with zipfile.ZipFile(z, "r") as zip_ref:
+        zip_ref.extractall(extract_dir)
+    return TabularPredictor.load(str(extract_dir))
+_YOLO = None
+_AG_PRED = None
+def yolo() -> YOLO:
+    global _YOLO
+    if _YOLO is None:
+        _YOLO = load_yolo_from_hub()
+    return _YOLO
+def ag_predictor() -> TabularPredictor:
+    global _AG_PRED
+    if _AG_PRED is None:
+        _AG_PRED = load_autogluon_tabular_from_hub()
+    return _AG_PRED
+# ----------------------------
+# Vision helpers
+# ----------------------------
 def isolate_scoreboard_color(frame_bgr: np.ndarray,
                              conf: float = YOLO_CONF,
                              iou: float = YOLO_IOU,
                              keep_conf: float = KEEP_CONF,
                              debug: bool = False,
                              frame_id: int = None) -> np.ndarray:
+    """
+    Grayscale everything except the chosen scoreboard bbox.
+    Strategy:
+      - Pick largest bbox ≥0.70
+      - Else, pick largest ≥0.65
+      - Keep only one box
+    """
     H, W = frame_bgr.shape[:2]
     gray = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2GRAY)
     gray = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)
+    primary_thr  = max(0.70, keep_conf)
     fallback_thr = max(0.65, primary_thr - 0.05)
     chosen_box = None
     if len(res):
         r = res[0]
         if getattr(r, "boxes", None) is not None and len(r.boxes) > 0:
+            boxes  = r.boxes.xyxy.cpu().numpy()
             scores = r.boxes.conf.cpu().numpy()
             candidates = list(zip(boxes, scores))
         dbg = gray.copy()
         if chosen_box is not None:
             x1, y1, x2, y2 = [int(round(v)) for v in chosen_box]
+            cv2.rectangle(dbg, (x1,y1), (x2,y2), (0,255,0), 2)
         out_path = DEBUG_DIR / f"frame_{frame_id:06d}.jpg"
         cv2.imwrite(str(out_path), dbg)
         print(f"[DEBUG] Saved debug frame → {out_path}")
     return gray
+def color_pixel_ratio(rgb: np.ndarray, ch: int) -> float:
+    R, G, B = rgb[:, :, 0], rgb[:, :, 1], rgb[:, :, 2]
+    if ch == 0:
+        mask = (R > 150) & (R > 1.2*G) & (R > 1.2*B)
+    else:
+        mask = (G > 100) & (G > 1.05*R) & (G > 1.05*B)
+    return np.sum(mask) / (rgb.shape[0]*rgb.shape[1] + 1e-9)
+def rolling_z(series: pd.Series, win: int = 45) -> pd.Series:
+    med = series.rolling(win, min_periods=5).median()
+    mad = series.rolling(win, min_periods=5).apply(
+        lambda x: np.median(np.abs(x - np.median(x))), raw=True
+    )
+    mad = mad.replace(0, mad[mad > 0].min() if (mad > 0).any() else 1.0)
+    return (series - med) / mad
+# ----------------------------
+# Video → features
+# ----------------------------
+def extract_feature_timeseries(video_path: str,
+                               frame_skip: int = FRAME_SKIP,
+                               debug: bool = False) -> Tuple[pd.DataFrame, float]:
+    print("[INFO] Starting frame extraction...")
+    cap = cv2.VideoCapture(video_path)
+    if not cap.isOpened():
+        return pd.DataFrame(), 0.0
+    fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
+    records, frame_idx = [], 0
+    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            break
+        if frame_idx % frame_skip == 0:
+            ts = frame_idx / fps
+            masked = isolate_scoreboard_color(frame, debug=debug, frame_id=frame_idx)
+            rgb = cv2.cvtColor(masked, cv2.COLOR_BGR2RGB)
+            red_ratio   = color_pixel_ratio(rgb, 0)
+            green_ratio = color_pixel_ratio(rgb, 1)
+            records.append({
+                "frame_id": frame_idx,
+                "timestamp": ts,
+                "red_ratio": red_ratio,
+                "green_ratio": green_ratio,
+            })
+        frame_idx += 1
+    cap.release()
+    df = pd.DataFrame(records)
+    print(f"[INFO] Processed {len(df)} frames out of {total_frames} (fps={fps:.2f})")
+    if df.empty:
+        return df, fps
+    df["red_diff"]   = df["red_ratio"].diff().fillna(0)
+    df["green_diff"] = df["green_ratio"].diff().fillna(0)
+    df["z_red"]      = rolling_z(df["red_ratio"])
+    df["z_green"]    = rolling_z(df["green_ratio"])
+    if debug:
+        out_csv = DEBUG_DIR / f"features_{uuid.uuid4().hex}.csv"
+        df.to_csv(out_csv, index=False)
+        print(f"[DEBUG] Saved features CSV → {out_csv}")
+    return df, fps
+# ----------------------------
+# Predictor + event picking
+# ----------------------------
+def predict_scores(df: pd.DataFrame) -> pd.Series:
+    feat_cols = ["red_ratio", "green_ratio", "red_diff", "green_diff", "z_red", "z_green"]
+    X = df[feat_cols].copy()
+    pred = ag_predictor().predict(X)
+    try:
+        proba = ag_predictor().predict_proba(X)
+        if isinstance(proba, pd.DataFrame) and (1 in proba.columns):
+            return proba[1]
+    except Exception:
+        pass
+    s = pd.Series(pred).astype(float)
+    rng = (s.quantile(0.95) - s.quantile(0.05)) or 1.0
+    return ((s - s.quantile(0.05)) / rng).clip(0, 1)
+def pick_events(df: pd.DataFrame, score: pd.Series, fps: float,
+                min_start_guard_s: float = 1.0,
+                guard_enable_min_duration_s: float = 6.0) -> List[float]:
     max_score = score.max()
     raw_cutoff = 0.7 * max_score if max_score > 0 else 0.4
+    z = rolling_z(score, win=45)
+    max_z = z.max()
+    z_cutoff = max(2.0, 0.6 * max_z)
+    print(f"[DEBUG] Predictor score stats: min={score.min():.3f}, max={max_score:.3f}, mean={score.mean():.3f}")
+    print(f"[DEBUG] Adaptive thresholds: raw>{raw_cutoff:.3f}, z>{z_cutoff:.2f}")
+    duration_est = float(df["timestamp"].max()) if not df.empty else 0.0
+    enforce_guard = duration_est >= guard_enable_min_duration_s
     out_times = []
+    min_dist_frames = max(1, int(1.0 * max(1.0, fps)))
+    y = score.values
+    last_kept = -min_dist_frames
+    for i in range(1, len(y)-1):
+        ts = float(df.iloc[i]["timestamp"])
+        local_peak = y[i] > y[i-1] and y[i] > y[i+1]
+        if ((z.iloc[i] > z_cutoff) or (y[i] > raw_cutoff)) and local_peak and (i - last_kept) >= min_dist_frames:
+            if (not enforce_guard) or (ts >= min_start_guard_s):
+                out_times.append(ts)
+                last_kept = i
+    if not out_times and len(y) > 0:
+        best_idx = int(np.argmax(y))
+        ts_best = float(df.iloc[best_idx]["timestamp"])
+        if (not enforce_guard) or (ts_best >= min_start_guard_s):
+            out_times = [ts_best]
+            print(f"[DEBUG] Fallback → using global max at {ts_best:.2f}s")
+    out_times.sort()
     grouped = []
     for t in out_times:
         if (not grouped) or (t - grouped[-1]) > GROUP_GAP_S:
             grouped.append(t)
+    print(f"[DEBUG] Final detected events: {grouped}")
     return grouped
+# ----------------------------
+# Clip helpers
+# ----------------------------
+def _probe_duration(video_path: str) -> float:
+    try:
+        if ffmpeg is None:
+            raise RuntimeError("ffmpeg-python not available")
+        meta = ffmpeg.probe(video_path)
+        return float(meta["format"]["duration"])
+    except:
+        return 0.0
+def cut_clip(video_path: str, start: float, end: float, out_path: str) -> str:
+    try:
+        cmd = ["ffmpeg", "-y", "-ss", str(max(0, start)), "-to", str(max(start, end)),
+               "-i", video_path, "-c", "copy", out_path]
+        sp = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        if sp.returncode == 0 and os.path.exists(out_path):
+            return out_path
+    except:
+        pass
+    from moviepy.editor import VideoFileClip
+    clip = VideoFileClip(video_path).subclip(max(0, start), max(start, end))
+    clip.write_videofile(out_path, codec="libx264", audio_codec="aac", verbose=False, logger=None)
+    return out_path
+# ----------------------------
+# Orchestrator
+# ----------------------------
+def extract_score_clips(video_path: str, debug: bool = False) -> Tuple[List[Tuple[str, str]], str]:
+    print("[INFO] Running full detection pipeline...")
+    df, fps = extract_feature_timeseries(video_path, frame_skip=FRAME_SKIP, debug=debug)
+    if df.empty:
+        return [], "No frames processed."
+    score = predict_scores(df)
+    if score.max() <= 1e-6:
+        print("[WARN] Flat scores from predictor (possible YOLO miss or feature mismatch).")
+        return [], "⚠️ No scoreboard detected or illumination scores flat. Please check video or model."
     events = pick_events(df, score, fps)
     if not events:
         return [], "⚠️ No touches confidently detected in this video."
+    duration = _probe_duration(video_path)
+    if duration <= 0:
+        duration = float(df["timestamp"].max() + CLIP_PAD_S + 0.5)
     clips = []
+    base = os.path.splitext(os.path.basename(video_path))[0]
+    for i, t in enumerate(events):
+        s = t - CLIP_PAD_S
+        e = t + CLIP_PAD_S
+        if s < 0:
+            e = min(duration, e - s)
+            s = 0
+        elif e > duration:
+            s = max(0, s - (e - duration))
+            e = duration
+        clip_path = os.path.join(tempfile.gettempdir(), f"{base}_score_{i+1:02d}.mp4")
+        cut_clip(video_path, s, e, clip_path)
+        label = f"Touch {i+1} @ {t:.2f}s"
+        clips.append((clip_path, label))
+    return clips, f"✅ Detected {len(clips)} event(s)."
+# ----------------------------
+# Gradio UI
+# ----------------------------
+CSS = """
+.gradio-container {max-width: 900px; margin: auto;}
+.header {text-align: center; margin-bottom: 20px;}
+.full-width {width: 100% !important;}
+.progress-bar {
+    width: 100%;
+    height: 30px;
+    background-color: #e0e0e0;
+    border-radius: 15px;
+    margin: 15px 0;
+    position: relative;
+    overflow: hidden;
+}
+.progress-fill {
+    height: 100%;
+    background-color: #4CAF50;
+    border-radius: 15px;
+    text-align: center;
+    line-height: 30px;
+    color: white;
+    font-weight: bold;
+    transition: width 0.3s;
+}
+.fencer {
+    position: absolute;
+    top: -5px;
+    font-size: 24px;
+    transition: left 0.3s;
+    transform: scaleX(-1);
+}
+"""
 def _make_progress_bar(percent: int, final_text: str = None):
     text = f"{percent}%" if not final_text else final_text
     return f"""
     </div>
     """
+def run_with_progress(video_file):
     if not video_file:
+        yield [], "Please upload a video file.", gr.update(visible=False)
         return
+    yield [], "🔄 Extracting frames...", _make_progress_bar(20)
+    df, fps = extract_feature_timeseries(video_file, frame_skip=FRAME_SKIP, debug=False)
+    if df.empty: