feat: pose model selector + overlay visualizer
#3
by BladeSzaSza - opened
- .claude/settings.local.json +48 -20
- .gitattributes +1 -0
- CLAUDE.md +176 -149
- README.md +12 -0
- app.py +55 -14
- checkpoints/mediapipe/pose_landmarker_full.task +3 -0
- docs/superpowers/plans/2026-06-09-pose-model-selector.md +734 -0
- docs/superpowers/plans/2026-06-09-pose-visualizer.md +914 -0
- docs/superpowers/specs/2026-06-09-pose-model-selector-design.md +171 -0
- docs/superpowers/specs/2026-06-09-pose-visualizer-design.md +197 -0
- formscout.egg-info/PKG-INFO +4 -4
- formscout.egg-info/SOURCES.txt +37 -25
- formscout.egg-info/dependency_links.txt +1 -1
- formscout.egg-info/top_level.txt +1 -1
- formscout/agents/pose2d.py +232 -95
- formscout/agents/visualizer.py +371 -0
- formscout/config.py +83 -2
- formscout/pipeline.py +3 -2
- formscout/startup.py +47 -0
- requirements.txt +2 -0
- tests/test_pose2d.py +61 -1
- tests/test_visualizer.py +176 -0
.claude/settings.local.json
CHANGED
|
@@ -1,20 +1,48 @@
|
|
| 1 |
-
{
|
| 2 |
-
"permissions": {
|
| 3 |
-
"allow": [
|
| 4 |
-
"Bash(git -C /Users/bolyos/Development/FormScout status)",
|
| 5 |
-
"Bash(git init *)",
|
| 6 |
-
"Bash(git add *)",
|
| 7 |
-
"Bash(git commit *)",
|
| 8 |
-
"Bash(huggingface-cli version *)",
|
| 9 |
-
"Bash(huggingface-cli whoami *)",
|
| 10 |
-
"Bash(hf auth *)",
|
| 11 |
-
"Bash(hf whoami *)",
|
| 12 |
-
"Bash(git remote *)",
|
| 13 |
-
"Bash(git push *)",
|
| 14 |
-
"Bash(git fetch *)",
|
| 15 |
-
"Bash(git pull *)",
|
| 16 |
-
"Bash(git lfs *)",
|
| 17 |
-
"Bash(hf upload *)"
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"permissions": {
|
| 3 |
+
"allow": [
|
| 4 |
+
"Bash(git -C /Users/bolyos/Development/FormScout status)",
|
| 5 |
+
"Bash(git init *)",
|
| 6 |
+
"Bash(git add *)",
|
| 7 |
+
"Bash(git commit *)",
|
| 8 |
+
"Bash(huggingface-cli version *)",
|
| 9 |
+
"Bash(huggingface-cli whoami *)",
|
| 10 |
+
"Bash(hf auth *)",
|
| 11 |
+
"Bash(hf whoami *)",
|
| 12 |
+
"Bash(git remote *)",
|
| 13 |
+
"Bash(git push *)",
|
| 14 |
+
"Bash(git fetch *)",
|
| 15 |
+
"Bash(git pull *)",
|
| 16 |
+
"Bash(git lfs *)",
|
| 17 |
+
"Bash(hf upload *)",
|
| 18 |
+
"Bash(git merge *)",
|
| 19 |
+
"Bash(git checkout *)",
|
| 20 |
+
"Bash(git stash *)",
|
| 21 |
+
"Bash(python -m pytest tests/test_phase2.py tests/test_types.py tests/test_biomechanics.py -q --tb=short)",
|
| 22 |
+
"Bash(python3 -m pytest tests/test_phase2.py tests/test_types.py tests/test_biomechanics.py -q --tb=short)",
|
| 23 |
+
"Bash(python3 *)",
|
| 24 |
+
"Bash(/Users/bolyos/Development/FormScout/.venv/bin/pip install *)",
|
| 25 |
+
"Bash(.venv/bin/pip install *)",
|
| 26 |
+
"Bash(.venv/bin/pytest tests/ -q --tb=short)",
|
| 27 |
+
"WebFetch(domain:huggingface.co)",
|
| 28 |
+
"Bash(brew list *)",
|
| 29 |
+
"Read(//opt/homebrew/bin/**)",
|
| 30 |
+
"Read(//usr/local/bin/**)",
|
| 31 |
+
"Bash(pip install *)",
|
| 32 |
+
"Skill(run)",
|
| 33 |
+
"Bash(pkill -f \"python3 app.py\")",
|
| 34 |
+
"Bash(python3 app.py)",
|
| 35 |
+
"Bash(echo \"PID: $!\")",
|
| 36 |
+
"Bash(pytest *)",
|
| 37 |
+
"Bash(ffmpeg -version)",
|
| 38 |
+
"Bash(file /Users/bolyos/.cache/huggingface/hub/models--qualcomm--MediaPipe-Pose-Estimation/blobs/*)",
|
| 39 |
+
"Bash(curl -L \"https://storage.googleapis.com/mediapipe-models/pose_landmarker/pose_landmarker_full/float16/latest/pose_landmarker_full.task\" -o /Users/bolyos/Development/FormScout/checkpoints/mediapipe/pose_landmarker_full.task)",
|
| 40 |
+
"Bash(/opt/homebrew/bin/brew list *)",
|
| 41 |
+
"Bash(/opt/homebrew/bin/git-lfs version *)",
|
| 42 |
+
"Read(//usr/local/Cellar/**)",
|
| 43 |
+
"Read(//usr/**)",
|
| 44 |
+
"Bash(git ls-remote *)",
|
| 45 |
+
"Bash(git ls-tree *)"
|
| 46 |
+
]
|
| 47 |
+
}
|
| 48 |
+
}
|
.gitattributes
CHANGED
|
@@ -35,3 +35,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
docs/FormScout-FMS-Spec.md.pdf filter=lfs diff=lfs merge=lfs -text
|
| 37 |
docs/plans/FormScout-Build-Prompt.md.pdf filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
docs/FormScout-FMS-Spec.md.pdf filter=lfs diff=lfs merge=lfs -text
|
| 37 |
docs/plans/FormScout-Build-Prompt.md.pdf filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
checkpoints/mediapipe/pose_landmarker_full.task filter=lfs diff=lfs merge=lfs -text
|
CLAUDE.md
CHANGED
|
@@ -1,149 +1,176 @@
|
|
| 1 |
-
# CLAUDE.md
|
| 2 |
-
|
| 3 |
-
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
| 4 |
-
|
| 5 |
-
## Project overview
|
| 6 |
-
|
| 7 |
-
FormScout is a Gradio app (Hugging Face Space) that scores Functional Movement Screen (FMS) videos 0–3 per test with a written rationale and an annotated overlay. It is a **screening aid** — not a diagnosis, not an injury predictor. Built for the Build Small Hackathon (Backyard AI track). Full product spec is in `docs/FormScout-FMS-Spec.md`; the engineering contract is in `docs/plans/FormScout-Build-Prompt.md`.
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
```bash
|
| 14 |
-
#
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
#
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
# Run all tests
|
| 21 |
-
pytest tests/
|
| 22 |
-
|
| 23 |
-
# Run a single test
|
| 24 |
-
pytest tests/
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
-
|
| 94 |
-
-
|
| 95 |
-
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
-
|
| 105 |
-
|
| 106 |
-
##
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
-
|
| 113 |
-
-
|
| 114 |
-
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
-
|
| 121 |
-
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
-
|
| 126 |
-
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# CLAUDE.md
|
| 2 |
+
|
| 3 |
+
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
| 4 |
+
|
| 5 |
+
## Project overview
|
| 6 |
+
|
| 7 |
+
FormScout is a Gradio app (Hugging Face Space) that scores Functional Movement Screen (FMS) videos 0–3 per test with a written rationale and an annotated overlay. It is a **screening aid** — not a diagnosis, not an injury predictor. Built for the Build Small Hackathon (Backyard AI track). Full product spec is in `docs/FormScout-FMS-Spec.md`; the engineering contract is in `docs/plans/FormScout-Build-Prompt.md`.
|
| 8 |
+
|
| 9 |
+
**Current status:** Phase 2 complete. All 7 FMS test rubric scorers, JudgeAgent, MovementClassifierAgent, and ReportAgent are implemented and tested (45/46 passing). Phase 3 is next (ST-GCN fine-tune + RAG retrieval).
|
| 10 |
+
|
| 11 |
+
## Common commands
|
| 12 |
+
|
| 13 |
+
```bash
|
| 14 |
+
# Run the Gradio app locally
|
| 15 |
+
python3 app.py
|
| 16 |
+
|
| 17 |
+
# Headless pipeline test (no Gradio)
|
| 18 |
+
python3 -m formscout.run sample.mp4
|
| 19 |
+
|
| 20 |
+
# Run all tests
|
| 21 |
+
pytest tests/
|
| 22 |
+
|
| 23 |
+
# Run a single test file or test
|
| 24 |
+
pytest tests/test_phase2.py
|
| 25 |
+
pytest tests/test_biomechanics.py::TestBiomechanicsAgent::test_deep_squat_score
|
| 26 |
+
|
| 27 |
+
# Lint / format
|
| 28 |
+
ruff check . && ruff format .
|
| 29 |
+
|
| 30 |
+
# Run Svelte component tests (when frontend work is added)
|
| 31 |
+
npx vitest run
|
| 32 |
+
```
|
| 33 |
+
|
| 34 |
+
## Architecture
|
| 35 |
+
|
| 36 |
+
The pipeline is a sequence of **typed specialist agents**. Each agent accepts and returns a frozen dataclass from `formscout/types.py`. The Director in `formscout/pipeline.py` orchestrates them as a deterministic state machine (not an LLM).
|
| 37 |
+
|
| 38 |
+
### Agent pipeline
|
| 39 |
+
|
| 40 |
+
```
|
| 41 |
+
IngestAgent → Pose2DAgent → [Body3DAgent — optional]
|
| 42 |
+
→ MovementClassifierAgent → BiomechanicsAgent
|
| 43 |
+
→ rubric/score_test() → JudgeAgent → ReportAgent
|
| 44 |
+
```
|
| 45 |
+
|
| 46 |
+
The **Director** (`pipeline.py`) owns the flow. `app.py` creates one `Director()` instance and calls `director.run(video_path, test_name, side)` per submission. The Gradio UI passes `test_name` directly (from dropdown), bypassing the classifier.
|
| 47 |
+
|
| 48 |
+
### The tiering rule (most important invariant)
|
| 49 |
+
|
| 50 |
+
**The 2D path is the default and must stand alone as a complete, functional pipeline.** `Body3DAgent` is only activated when `config.ENABLE_3D == True` AND the checkpoint loads successfully. If 3D is off or fails, `Body3DResult(used=False, ...)` is returned — this is a normal success path, not an error. `BiomechFeatures.view` is `"2d"` or `"3d"` so the `JudgeAgent` can caveat its rationale appropriately. Never put `Body3DAgent` on the critical path.
|
| 51 |
+
|
| 52 |
+
### Feature flags in `config.py` and their current state
|
| 53 |
+
|
| 54 |
+
| Flag | Default | Meaning |
|
| 55 |
+
|------|---------|---------|
|
| 56 |
+
| `ENABLE_JUDGE` | `False` | When False, JudgeAgent falls back to rubric score — no llama.cpp needed |
|
| 57 |
+
| `ENABLE_3D` | `False` | When False, Body3DAgent returns `used=False` immediately |
|
| 58 |
+
| `ENABLE_STGCN` | `False` | Phase 3 — ST-GCN learned scoring head |
|
| 59 |
+
| `ENABLE_RAG` | `False` | Phase 3 — RetrievalAgent exemplar lookup |
|
| 60 |
+
|
| 61 |
+
All model IDs, thresholds, k-values, and feature flags live in `config.py` — never scattered literals.
|
| 62 |
+
|
| 63 |
+
### Fallback chain (important for local dev and Spaces)
|
| 64 |
+
|
| 65 |
+
1. `ENABLE_JUDGE=False` → JudgeAgent returns rubric score wrapped as JudgeResult (no VLM needed)
|
| 66 |
+
2. `ENABLE_JUDGE=True` + llama.cpp server unreachable → same fallback, logs a warning
|
| 67 |
+
3. `ENABLE_JUDGE=True` + server available → calls Qwen3-VL-8B-Instruct at `127.0.0.1:8080`
|
| 68 |
+
|
| 69 |
+
This means the app is **fully functional without any GPU or llama.cpp** — rubric scoring is pure Python.
|
| 70 |
+
|
| 71 |
+
### Rubric scorers
|
| 72 |
+
|
| 73 |
+
Each FMS test has a pure-function scorer in `formscout/rubric/`:
|
| 74 |
+
|
| 75 |
+
```
|
| 76 |
+
score_deep_squat / score_hurdle_step / score_inline_lunge /
|
| 77 |
+
score_shoulder_mobility / score_active_slr /
|
| 78 |
+
score_trunk_stability_pushup / score_rotary_stability
|
| 79 |
+
```
|
| 80 |
+
|
| 81 |
+
All accept `BiomechFeatures` and return `ScoreResult`. Dispatch via `rubric.score_test(features)`. **Rubric functions must remain pure** — no model calls, no I/O.
|
| 82 |
+
|
| 83 |
+
### Bilateral tests
|
| 84 |
+
|
| 85 |
+
`hurdle_step`, `inline_lunge`, `shoulder_mobility`, `active_slr` are bilateral. `ReportAgent` groups them by test name, takes the **lower** score, and always emits the asymmetry delta even when scores are equal. `composite` is `None` when any test is unscored.
|
| 86 |
+
|
| 87 |
+
### Types contract
|
| 88 |
+
|
| 89 |
+
Every agent I/O is a frozen dataclass from `formscout/types.py`. Key types:
|
| 90 |
+
|
| 91 |
+
- `IngestResult` — decoded frames (np.ndarray list), fps, duration, dimensions
|
| 92 |
+
- `Pose2DResult` — per-frame keypoints as `dict[int, {x, y, conf}]` (COCO 17 joints)
|
| 93 |
+
- `Body3DResult` — optional 3D joints, always has `used: bool`
|
| 94 |
+
- `MovementResult` — `test_name` (validated enum), `side` ("left"|"right"|"na")
|
| 95 |
+
- `BiomechFeatures` — `angles: dict`, `alignments: dict`, `view: "2d"|"3d"`, `symmetry_delta`
|
| 96 |
+
- `ScoreResult` — `score: int` (0–3), `rationale`, `needs_human`
|
| 97 |
+
- `JudgeResult` — same as ScoreResult + `compensation_tags`, `corrective_hint`; `score=None` when `needs_human=True`
|
| 98 |
+
- `PipelineState` — mutable accumulator threaded through the Director
|
| 99 |
+
|
| 100 |
+
`MovementResult` and `JudgeResult` validate their fields in `__post_init__` — passing invalid values raises immediately.
|
| 101 |
+
|
| 102 |
+
### YOLO checkpoint location
|
| 103 |
+
|
| 104 |
+
`config.YOLO_POSE_MODEL` points to `checkpoints/yolo26/yolo26l-pose.pt` (absolute path). Both `yolo26l-pose.pt` and `yolo26x-pose.pt` are committed to the repo. Models load once at module scope via `_get_model()` in `pose2d.py`.
|
| 105 |
+
|
| 106 |
+
### llama.cpp serving
|
| 107 |
+
|
| 108 |
+
`formscout/serving/llama_cpp.py` provides `LlamaCppClient` (VLM, port 8080) and `EmbeddingClient` (embeddings, port 8081). Both check `/health` before use and return safe error dicts when unavailable. Only active when the corresponding `ENABLE_*` flag is True.
|
| 109 |
+
|
| 110 |
+
## Key constraints and invariants
|
| 111 |
+
|
| 112 |
+
- **No cloud model APIs.** All inference runs on-Space (ZeroGPU). No OpenAI/Anthropic/Gemini calls.
|
| 113 |
+
- **Pain is never auto-scored.** Any clearing test or visible distress sets `needs_human=True` — enforced in rubric functions and JudgeAgent. `JudgeResult.score` must be `None` when `needs_human=True`.
|
| 114 |
+
- **Quality gates (Director, never silently skip):**
|
| 115 |
+
- Any agent `confidence < config.MIN_CONFIDENCE` (0.6) → warn or stop
|
| 116 |
+
- `|rubric.score - judge.score| >= 1` → flag disagreement
|
| 117 |
+
- `MovementResult.test_name == "unknown"` → stop pipeline, surface manual override
|
| 118 |
+
- `JudgeAgent.needs_human == True` → no numeric score emitted
|
| 119 |
+
- **Composite is null** when any test is unscored. Never show a partial 0–21 as complete.
|
| 120 |
+
- **Pipeline runs headless.** No Gradio imports in any agent file.
|
| 121 |
+
- **Safety banner** ("Screening aid — not a diagnosis…") must always be visible in the UI — appears at top and bottom of `app.py`.
|
| 122 |
+
|
| 123 |
+
## Engineering standards
|
| 124 |
+
|
| 125 |
+
- Every agent: one public entrypoint, typed dataclass I/O from `types.py`, `confidence: float` and `notes: str` on every result.
|
| 126 |
+
- Models load once at module/instance init — never inside the inference hot path.
|
| 127 |
+
- Every agent module docstring states: purpose, inputs, outputs, failure behavior, model param count, license, and gated status.
|
| 128 |
+
- `tracing.py` records structured per-agent I/O for any run; one full run gets exported to the Hub.
|
| 129 |
+
- Every agent ships with a pytest in `tests/` that runs without model downloads and asserts the typed contract.
|
| 130 |
+
|
| 131 |
+
## Model stack (~17.6B total — stay under 32B)
|
| 132 |
+
|
| 133 |
+
| Component | Model | Params | Status |
|
| 134 |
+
|---|---|---|---|
|
| 135 |
+
| 2D pose (primary) | YOLO26l-Pose | 0.026B | Ready (checkpoint committed) |
|
| 136 |
+
| 2D pose (HQ alt) | YOLO26x-Pose | 0.058B | Ready (checkpoint committed) |
|
| 137 |
+
| 2D pose (fallback) | `noahcao/sapiens-pose-coco` | ~0.6B | Access accepted |
|
| 138 |
+
| Segmentation | SAM 3.1 base | ~0.85B | Access accepted |
|
| 139 |
+
| 3D biomechanics | `facebook/sam-3d-body-dinov3` | ~0.84B | **Access ACCEPTED Jun 4 2026** |
|
| 140 |
+
| Learned scoring | ST-GCN (pyskl) | ~0.03B | Phase 3 |
|
| 141 |
+
| Judge + Classifier | Qwen3-VL-8B-Instruct (llama.cpp) | 8B | Ready (ENABLE_JUDGE=False for now) |
|
| 142 |
+
| Retrieval | Qwen3-VL-Embedding-8B (llama.cpp) | 8B | Phase 3 |
|
| 143 |
+
|
| 144 |
+
Track the running sum in `MODEL_BUDGET.md`. The two Qwen3-VL-8B models share a backbone.
|
| 145 |
+
|
| 146 |
+
## Gradio + Svelte UI guidance
|
| 147 |
+
|
| 148 |
+
The UI uses **Gradio `gr.Blocks`** with custom CSS/theme (`formscout/ui/theme.py`). Custom Svelte components for score dial, asymmetry bars, rubric drawer are planned for Phase 4. Use `gradio-svelte-expert` agent for Svelte component work.
|
| 149 |
+
|
| 150 |
+
- ZeroGPU: wrap heavy inference (`Pose2DAgent.run`, `Body3DAgent.run`) in `@spaces.GPU` before deploying to Spaces.
|
| 151 |
+
- Verify Gradio APIs against current docs before use — pin exact versions in `requirements.txt`.
|
| 152 |
+
|
| 153 |
+
## Build phases
|
| 154 |
+
|
| 155 |
+
1. **Phase 0 — Recon:** ✅ Complete. See `RECON.md`.
|
| 156 |
+
2. **Phase 1 — Spine:** ✅ Complete. Deep Squat end-to-end.
|
| 157 |
+
3. **Phase 2 — All 7 tests:** ✅ Complete. Classifier, Judge, Report agents; all rubric scorers; Gradio UI.
|
| 158 |
+
4. **Phase 3 — Learned scoring + retrieval:** ST-GCN fine-tune on physio clips, publish to Hub. RetrievalAgent with embedding index.
|
| 159 |
+
5. **Phase 4 — Polish + ship:** Custom Svelte UI components, overlay video, PDF export, agent trace to Hub, blog post.
|
| 160 |
+
|
| 161 |
+
## Known issues
|
| 162 |
+
|
| 163 |
+
- `tests/test_biomechanics.py::TestBiomechanicsAgent::test_unimplemented_test_returns_low_confidence` fails: expects `"not yet implemented"` in `result.notes` but biomechanics returns empty string. Minor — low priority.
|
| 164 |
+
|
| 165 |
+
## Badge checklist (definition of done)
|
| 166 |
+
|
| 167 |
+
- [ ] Space runs green; upload → scorecard works on real clips
|
| 168 |
+
- [ ] Param sum verified ≤ 32B in `MODEL_BUDGET.md`
|
| 169 |
+
- [ ] 🔌 **Off the Grid** — no cloud model APIs anywhere in the pipeline
|
| 170 |
+
- [ ] 🎯 **Well-Tuned** — fine-tuned ST-GCN head published to Hub with honest model card
|
| 171 |
+
- [ ] 🎨 **Off-Brand** — custom, non-default Gradio UI (scout/trail theme)
|
| 172 |
+
- [ ] 🦙 **Llama Champion** — VLM + embedder served via llama.cpp (GGUF)
|
| 173 |
+
- [ ] 📡 **Sharing is Caring** — one full agent trace (all I/O) published to Hub
|
| 174 |
+
- [ ] 📓 **Field Notes** — blog post written, honesty section (FMS limitations) front-and-center
|
| 175 |
+
- [ ] Demo video + social post recorded
|
| 176 |
+
- [ ] Safety banner present; pain/clearing never auto-scored; low-confidence flagged
|
README.md
CHANGED
|
@@ -1,3 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# FormScout
|
| 2 |
|
| 3 |
FMS (Functional Movement Screen) scoring pipeline — a screening aid that scores movement videos 0–3 per test with a written rationale and annotated overlay.
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: FormScout
|
| 3 |
+
emoji: 🏔️
|
| 4 |
+
colorFrom: green
|
| 5 |
+
colorTo: green
|
| 6 |
+
sdk: gradio
|
| 7 |
+
app_file: app.py
|
| 8 |
+
pinned: false
|
| 9 |
+
license: apache-2.0
|
| 10 |
+
short_description: FMS video scoring — movement screen aid
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
# FormScout
|
| 14 |
|
| 15 |
FMS (Functional Movement Screen) scoring pipeline — a screening aid that scores movement videos 0–3 per test with a written rationale and annotated overlay.
|
app.py
CHANGED
|
@@ -8,11 +8,17 @@ rubric breakdown, and persistent safety banner.
|
|
| 8 |
"""
|
| 9 |
from __future__ import annotations
|
| 10 |
|
|
|
|
|
|
|
| 11 |
import gradio as gr
|
| 12 |
|
| 13 |
from formscout.pipeline import Director
|
| 14 |
from formscout.rubric import score_test
|
| 15 |
from formscout.ui.theme import formscout_theme, FORMSCOUT_CSS
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
|
| 18 |
# ─── Constants ───────────────────────────────────────────────────────────────
|
|
@@ -42,7 +48,7 @@ SCORE_DESCRIPTIONS = {
|
|
| 42 |
|
| 43 |
# ─── Processing ──────────────────────────────────────────────────────────────
|
| 44 |
|
| 45 |
-
def process_video(video_path: str, test_name: str, side: str):
|
| 46 |
"""Process an uploaded video through the FormScout pipeline."""
|
| 47 |
if not video_path:
|
| 48 |
return (
|
|
@@ -50,10 +56,12 @@ def process_video(video_path: str, test_name: str, side: str):
|
|
| 50 |
"Upload a video to begin analysis.",
|
| 51 |
"",
|
| 52 |
"",
|
|
|
|
|
|
|
| 53 |
)
|
| 54 |
|
| 55 |
director = Director()
|
| 56 |
-
state = director.run(video_path, test_name=test_name, side=side)
|
| 57 |
|
| 58 |
# ─── Score card ───
|
| 59 |
score_html = _render_empty_state()
|
|
@@ -61,7 +69,6 @@ def process_video(video_path: str, test_name: str, side: str):
|
|
| 61 |
|
| 62 |
if state.features:
|
| 63 |
result = score_test(state.features)
|
| 64 |
-
# Use judge result if available, otherwise rubric
|
| 65 |
judge = state.judge
|
| 66 |
if judge and judge.score is not None:
|
| 67 |
score_html = _render_score_card(judge.score, judge.confidence, judge.needs_human)
|
|
@@ -79,7 +86,23 @@ def process_video(video_path: str, test_name: str, side: str):
|
|
| 79 |
# ─── Warnings/errors ───
|
| 80 |
alerts = _render_alerts(state)
|
| 81 |
|
| 82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
|
| 84 |
|
| 85 |
def _render_score_card(score: int, confidence: float, needs_human: bool) -> str:
|
|
@@ -223,11 +246,7 @@ def _render_alerts(state) -> str:
|
|
| 223 |
|
| 224 |
def build_app() -> gr.Blocks:
|
| 225 |
"""Build the FormScout Gradio app with custom scout/trail theme."""
|
| 226 |
-
with gr.Blocks(
|
| 227 |
-
title="FormScout — FMS Screening Aid",
|
| 228 |
-
theme=formscout_theme(),
|
| 229 |
-
css=FORMSCOUT_CSS,
|
| 230 |
-
) as app:
|
| 231 |
|
| 232 |
# Header
|
| 233 |
gr.HTML("""
|
|
@@ -262,6 +281,24 @@ def build_app() -> gr.Blocks:
|
|
| 262 |
scale=1,
|
| 263 |
)
|
| 264 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 265 |
submit_btn = gr.Button(
|
| 266 |
"🎯 Score Movement",
|
| 267 |
variant="primary",
|
|
@@ -292,6 +329,10 @@ def build_app() -> gr.Blocks:
|
|
| 292 |
with gr.TabItem("⚠️ Alerts"):
|
| 293 |
alerts_md = gr.Markdown("")
|
| 294 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 295 |
# Footer safety banner
|
| 296 |
gr.HTML(f'<div class="safety-banner" style="margin-top: 20px;">{DISCLAIMER}</div>')
|
| 297 |
|
|
@@ -304,17 +345,17 @@ def build_app() -> gr.Blocks:
|
|
| 304 |
|
| 305 |
# ─── Event wiring ────────────────────────────────────────────────────
|
| 306 |
|
| 307 |
-
def _map_inputs(video, test_display_name, side_display):
|
| 308 |
"""Map UI display values to internal values."""
|
| 309 |
test_map = {name: val for name, val in FMS_TESTS}
|
| 310 |
test_name = test_map.get(test_display_name, "deep_squat")
|
| 311 |
side = {"N/A": "na", "Left": "left", "Right": "right"}.get(side_display, "na")
|
| 312 |
-
return process_video(video, test_name, side)
|
| 313 |
|
| 314 |
submit_btn.click(
|
| 315 |
fn=_map_inputs,
|
| 316 |
-
inputs=[video_input, test_dropdown, side_dropdown],
|
| 317 |
-
outputs=[score_html, pipeline_md, score_details, alerts_md],
|
| 318 |
)
|
| 319 |
|
| 320 |
return app
|
|
@@ -322,4 +363,4 @@ def build_app() -> gr.Blocks:
|
|
| 322 |
|
| 323 |
if __name__ == "__main__":
|
| 324 |
app = build_app()
|
| 325 |
-
app.launch()
|
|
|
|
| 8 |
"""
|
| 9 |
from __future__ import annotations
|
| 10 |
|
| 11 |
+
import tempfile
|
| 12 |
+
|
| 13 |
import gradio as gr
|
| 14 |
|
| 15 |
from formscout.pipeline import Director
|
| 16 |
from formscout.rubric import score_test
|
| 17 |
from formscout.ui.theme import formscout_theme, FORMSCOUT_CSS
|
| 18 |
+
from formscout import config
|
| 19 |
+
from formscout.startup import ensure_checkpoints
|
| 20 |
+
|
| 21 |
+
ensure_checkpoints()
|
| 22 |
|
| 23 |
|
| 24 |
# ─── Constants ───────────────────────────────────────────────────────────────
|
|
|
|
| 48 |
|
| 49 |
# ─── Processing ──────────────────────────────────────────────────────────────
|
| 50 |
|
| 51 |
+
def process_video(video_path: str, test_name: str, side: str, model_key: str, layers: list[str]):
|
| 52 |
"""Process an uploaded video through the FormScout pipeline."""
|
| 53 |
if not video_path:
|
| 54 |
return (
|
|
|
|
| 56 |
"Upload a video to begin analysis.",
|
| 57 |
"",
|
| 58 |
"",
|
| 59 |
+
None,
|
| 60 |
+
"",
|
| 61 |
)
|
| 62 |
|
| 63 |
director = Director()
|
| 64 |
+
state = director.run(video_path, test_name=test_name, side=side, model_key=model_key)
|
| 65 |
|
| 66 |
# ─── Score card ───
|
| 67 |
score_html = _render_empty_state()
|
|
|
|
| 69 |
|
| 70 |
if state.features:
|
| 71 |
result = score_test(state.features)
|
|
|
|
| 72 |
judge = state.judge
|
| 73 |
if judge and judge.score is not None:
|
| 74 |
score_html = _render_score_card(judge.score, judge.confidence, judge.needs_human)
|
|
|
|
| 86 |
# ─── Warnings/errors ───
|
| 87 |
alerts = _render_alerts(state)
|
| 88 |
|
| 89 |
+
# ─── Overlay video ───
|
| 90 |
+
overlay_path = None
|
| 91 |
+
vel_summary = ""
|
| 92 |
+
layer_set = {lbl.lower().replace(" ", "_") for lbl in (layers or [])}
|
| 93 |
+
if layer_set and state.ingest and state.pose2d:
|
| 94 |
+
try:
|
| 95 |
+
from formscout.agents.visualizer import PoseVisualizer, build_velocity_summary
|
| 96 |
+
vis = PoseVisualizer()
|
| 97 |
+
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as f:
|
| 98 |
+
out_path = f.name
|
| 99 |
+
overlay_path = vis.render_video(state.ingest, state.pose2d, layer_set, out_path)
|
| 100 |
+
if overlay_path:
|
| 101 |
+
vel_summary = build_velocity_summary(state.pose2d.keypoints, vis.last_velocities)
|
| 102 |
+
except Exception as e:
|
| 103 |
+
alerts = (alerts or "") + f"\n⚠️ Visualizer error: {e}"
|
| 104 |
+
|
| 105 |
+
return score_html, pipeline_md, score_details, alerts, overlay_path, vel_summary
|
| 106 |
|
| 107 |
|
| 108 |
def _render_score_card(score: int, confidence: float, needs_human: bool) -> str:
|
|
|
|
| 246 |
|
| 247 |
def build_app() -> gr.Blocks:
|
| 248 |
"""Build the FormScout Gradio app with custom scout/trail theme."""
|
| 249 |
+
with gr.Blocks(title="FormScout — FMS Screening Aid") as app:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 250 |
|
| 251 |
# Header
|
| 252 |
gr.HTML("""
|
|
|
|
| 281 |
scale=1,
|
| 282 |
)
|
| 283 |
|
| 284 |
+
_available_models = config.available_pose_models() or config.POSE_MODELS
|
| 285 |
+
_default_model = (
|
| 286 |
+
config.DEFAULT_POSE_MODEL
|
| 287 |
+
if config.DEFAULT_POSE_MODEL in _available_models
|
| 288 |
+
else list(_available_models.keys())[0]
|
| 289 |
+
)
|
| 290 |
+
pose_model_dropdown = gr.Dropdown(
|
| 291 |
+
choices=list(_available_models.keys()),
|
| 292 |
+
value=_default_model,
|
| 293 |
+
label="Pose Model",
|
| 294 |
+
)
|
| 295 |
+
|
| 296 |
+
overlay_layers = gr.CheckboxGroup(
|
| 297 |
+
choices=["Skeleton", "Trails", "Velocity arrows"],
|
| 298 |
+
value=["Skeleton", "Trails"],
|
| 299 |
+
label="Overlay Layers",
|
| 300 |
+
)
|
| 301 |
+
|
| 302 |
submit_btn = gr.Button(
|
| 303 |
"🎯 Score Movement",
|
| 304 |
variant="primary",
|
|
|
|
| 329 |
with gr.TabItem("⚠️ Alerts"):
|
| 330 |
alerts_md = gr.Markdown("")
|
| 331 |
|
| 332 |
+
with gr.TabItem("🎬 Overlay Video"):
|
| 333 |
+
overlay_video = gr.Video(label="Annotated Movement")
|
| 334 |
+
velocity_md = gr.Markdown("")
|
| 335 |
+
|
| 336 |
# Footer safety banner
|
| 337 |
gr.HTML(f'<div class="safety-banner" style="margin-top: 20px;">{DISCLAIMER}</div>')
|
| 338 |
|
|
|
|
| 345 |
|
| 346 |
# ─── Event wiring ────────────────────────────────────────────────────
|
| 347 |
|
| 348 |
+
def _map_inputs(video, test_display_name, side_display, pose_model_key, overlay_layers):
|
| 349 |
"""Map UI display values to internal values."""
|
| 350 |
test_map = {name: val for name, val in FMS_TESTS}
|
| 351 |
test_name = test_map.get(test_display_name, "deep_squat")
|
| 352 |
side = {"N/A": "na", "Left": "left", "Right": "right"}.get(side_display, "na")
|
| 353 |
+
return process_video(video, test_name, side, pose_model_key, overlay_layers)
|
| 354 |
|
| 355 |
submit_btn.click(
|
| 356 |
fn=_map_inputs,
|
| 357 |
+
inputs=[video_input, test_dropdown, side_dropdown, pose_model_dropdown, overlay_layers],
|
| 358 |
+
outputs=[score_html, pipeline_md, score_details, alerts_md, overlay_video, velocity_md],
|
| 359 |
)
|
| 360 |
|
| 361 |
return app
|
|
|
|
| 363 |
|
| 364 |
if __name__ == "__main__":
|
| 365 |
app = build_app()
|
| 366 |
+
app.launch(theme=formscout_theme(), css=FORMSCOUT_CSS)
|
checkpoints/mediapipe/pose_landmarker_full.task
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4eaa5eb7a98365221087693fcc286334cf0858e2eb6e15b506aa4a7ecdcec4ad
|
| 3 |
+
size 9398198
|
docs/superpowers/plans/2026-06-09-pose-model-selector.md
ADDED
|
@@ -0,0 +1,734 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Pose Model Selector Implementation Plan
|
| 2 |
+
|
| 3 |
+
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
|
| 4 |
+
|
| 5 |
+
**Goal:** Replace the hard-coded YOLO26l default with a 10-model dropdown (MediaPipe, YOLO26 n→x, Sapiens2 0.4B→5B) wired end-to-end from UI through the Director to `Pose2DAgent`.
|
| 6 |
+
|
| 7 |
+
**Architecture:** Unified `POSE_MODELS` registry in `config.py` drives a `gr.Dropdown` in `app.py`; the selected key flows through `Director.run()` into `Pose2DAgent.run(model_key)`, which dispatches to one of three private sub-runners (`_run_yolo`, `_run_mediapipe`, `_run_sapiens2`), all producing the same COCO-17 `list[dict]` contract.
|
| 8 |
+
|
| 9 |
+
**Tech Stack:** `ultralytics` (YOLO), `onnxruntime` + `huggingface_hub` (MediaPipe), `transformers` (Sapiens2), `gradio` (UI).
|
| 10 |
+
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
## File map
|
| 14 |
+
|
| 15 |
+
| File | Change |
|
| 16 |
+
|---|---|
|
| 17 |
+
| `formscout/config.py` | Replace `YOLO_POSE_MODELS` with `POSE_MODELS` dict + `DEFAULT_POSE_MODEL` |
|
| 18 |
+
| `formscout/agents/pose2d.py` | Add `_run_yolo`, `_run_mediapipe`, `_run_sapiens2`; update `run()` signature |
|
| 19 |
+
| `formscout/pipeline.py` | Change `pose_model_path` param to `model_key` |
|
| 20 |
+
| `app.py` | Add `pose_model_dropdown`, fix `_map_inputs` + `process_video` |
|
| 21 |
+
| `requirements.txt` | Add `onnxruntime>=1.18` |
|
| 22 |
+
| `tests/test_pose2d.py` | Add mocked tests for each backend |
|
| 23 |
+
|
| 24 |
+
---
|
| 25 |
+
|
| 26 |
+
## Task 1: Add unified `POSE_MODELS` registry to `config.py`
|
| 27 |
+
|
| 28 |
+
**Files:**
|
| 29 |
+
- Modify: `formscout/config.py`
|
| 30 |
+
|
| 31 |
+
- [ ] **Step 1: Open `formscout/config.py` and replace the `YOLO_POSE_MODELS` block**
|
| 32 |
+
|
| 33 |
+
Replace lines 12–20 (the `YOLO_POSE_MODELS` dict and `YOLO_POSE_MODEL` / `YOLO_POSE_MODEL_HQ` lines) with:
|
| 34 |
+
|
| 35 |
+
```python
|
| 36 |
+
_YOLO_DIR = ROOT / "checkpoints" / "yolo26"
|
| 37 |
+
|
| 38 |
+
POSE_MODELS: dict[str, dict] = {
|
| 39 |
+
# ── MediaPipe (Qualcomm HF, ONNX Runtime) ──────────────────────────────
|
| 40 |
+
"MediaPipe-Pose ⬇ ~16 MB, CPU-friendly": {
|
| 41 |
+
"backend": "mediapipe",
|
| 42 |
+
"hf_id": "qualcomm/MediaPipe-Pose-Estimation",
|
| 43 |
+
"params_m": 4.2,
|
| 44 |
+
},
|
| 45 |
+
# ── YOLO26 (local checkpoints) ─────────────────────────────────────────
|
| 46 |
+
"YOLO26n — nano (0.7M, fastest)": {
|
| 47 |
+
"backend": "yolo",
|
| 48 |
+
"path": str(_YOLO_DIR / "yolo26n-pose.pt"),
|
| 49 |
+
"params_m": 0.7,
|
| 50 |
+
},
|
| 51 |
+
"YOLO26s — small (3.5M)": {
|
| 52 |
+
"backend": "yolo",
|
| 53 |
+
"path": str(_YOLO_DIR / "yolo26s-pose.pt"),
|
| 54 |
+
"params_m": 3.5,
|
| 55 |
+
},
|
| 56 |
+
"YOLO26m — medium (9M)": {
|
| 57 |
+
"backend": "yolo",
|
| 58 |
+
"path": str(_YOLO_DIR / "yolo26m-pose.pt"),
|
| 59 |
+
"params_m": 9.0,
|
| 60 |
+
},
|
| 61 |
+
"YOLO26l — large (25.9M)": {
|
| 62 |
+
"backend": "yolo",
|
| 63 |
+
"path": str(_YOLO_DIR / "yolo26l-pose.pt"),
|
| 64 |
+
"params_m": 25.9,
|
| 65 |
+
},
|
| 66 |
+
"YOLO26x — extra-large (57.6M)": {
|
| 67 |
+
"backend": "yolo",
|
| 68 |
+
"path": str(_YOLO_DIR / "yolo26x-pose.pt"),
|
| 69 |
+
"params_m": 57.6,
|
| 70 |
+
},
|
| 71 |
+
# ── Sapiens2 (HF download, transformers) ───────────────────────────────
|
| 72 |
+
"Sapiens2-0.4B ⬇ ~1.6 GB": {
|
| 73 |
+
"backend": "sapiens2",
|
| 74 |
+
"hf_id": "facebook/sapiens2-pose-0.4b",
|
| 75 |
+
"params_m": 400,
|
| 76 |
+
},
|
| 77 |
+
"Sapiens2-0.8B ⬇ ~3.2 GB": {
|
| 78 |
+
"backend": "sapiens2",
|
| 79 |
+
"hf_id": "facebook/sapiens2-pose-0.8b",
|
| 80 |
+
"params_m": 800,
|
| 81 |
+
},
|
| 82 |
+
"Sapiens2-1B ⬇ ~4 GB": {
|
| 83 |
+
"backend": "sapiens2",
|
| 84 |
+
"hf_id": "facebook/sapiens2-pose-1b",
|
| 85 |
+
"params_m": 1000,
|
| 86 |
+
},
|
| 87 |
+
"Sapiens2-5B ⬇ ~20 GB, large GPU": {
|
| 88 |
+
"backend": "sapiens2",
|
| 89 |
+
"hf_id": "facebook/sapiens2-pose-5b",
|
| 90 |
+
"params_m": 5000,
|
| 91 |
+
},
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
DEFAULT_POSE_MODEL = "YOLO26n — nano (0.7M, fastest)"
|
| 95 |
+
|
| 96 |
+
# Backward-compat aliases — kept for any direct references outside the agent
|
| 97 |
+
YOLO_POSE_MODEL = str(_YOLO_DIR / "yolo26l-pose.pt")
|
| 98 |
+
YOLO_POSE_MODEL_HQ = str(_YOLO_DIR / "yolo26x-pose.pt")
|
| 99 |
+
```
|
| 100 |
+
|
| 101 |
+
- [ ] **Step 2: Verify import is clean**
|
| 102 |
+
|
| 103 |
+
```bash
|
| 104 |
+
python3 -c "from formscout import config; print(list(config.POSE_MODELS.keys()))"
|
| 105 |
+
```
|
| 106 |
+
|
| 107 |
+
Expected: list of 10 model labels, starting with `MediaPipe-Pose...`
|
| 108 |
+
|
| 109 |
+
- [ ] **Step 3: Commit**
|
| 110 |
+
|
| 111 |
+
```bash
|
| 112 |
+
git add formscout/config.py
|
| 113 |
+
git commit -m "feat: unified POSE_MODELS registry with MediaPipe, YOLO26 n-x, Sapiens2 0.4-5B"
|
| 114 |
+
git push
|
| 115 |
+
```
|
| 116 |
+
|
| 117 |
+
---
|
| 118 |
+
|
| 119 |
+
## Task 2: Refactor `Pose2DAgent` — YOLO sub-runner + new `run()` signature
|
| 120 |
+
|
| 121 |
+
**Files:**
|
| 122 |
+
- Modify: `formscout/agents/pose2d.py`
|
| 123 |
+
- Modify: `tests/test_pose2d.py`
|
| 124 |
+
|
| 125 |
+
- [ ] **Step 1: Write failing test for the new `model_key` signature**
|
| 126 |
+
|
| 127 |
+
Add to `tests/test_pose2d.py`:
|
| 128 |
+
|
| 129 |
+
```python
|
| 130 |
+
def test_run_accepts_model_key(pose2d_agent):
|
| 131 |
+
"""run() must accept model_key kwarg, not model_path."""
|
| 132 |
+
import inspect
|
| 133 |
+
sig = inspect.signature(pose2d_agent.run)
|
| 134 |
+
assert "model_key" in sig.parameters
|
| 135 |
+
assert "model_path" not in sig.parameters
|
| 136 |
+
```
|
| 137 |
+
|
| 138 |
+
- [ ] **Step 2: Run to confirm it fails**
|
| 139 |
+
|
| 140 |
+
```bash
|
| 141 |
+
pytest tests/test_pose2d.py::TestPose2DAgent::test_run_accepts_model_key -v
|
| 142 |
+
```
|
| 143 |
+
|
| 144 |
+
Expected: FAIL — `model_path` still present in signature.
|
| 145 |
+
|
| 146 |
+
- [ ] **Step 3: Rewrite `formscout/agents/pose2d.py`**
|
| 147 |
+
|
| 148 |
+
Replace the entire file with:
|
| 149 |
+
|
| 150 |
+
```python
|
| 151 |
+
"""
|
| 152 |
+
Pose2DAgent — 2D per-frame keypoint extraction.
|
| 153 |
+
|
| 154 |
+
Backends: yolo (local ONNX), mediapipe (Qualcomm HF/ONNX Runtime),
|
| 155 |
+
sapiens2 (Meta HF/transformers).
|
| 156 |
+
All backends output COCO-17 keypoints: dict[int, {x, y, conf}] per frame.
|
| 157 |
+
|
| 158 |
+
Input: IngestResult
|
| 159 |
+
Output: Pose2DResult(keypoints per frame, fps, confidence)
|
| 160 |
+
Failure: Pose2DResult(confidence=0.0, notes=<reason>) — never raises.
|
| 161 |
+
"""
|
| 162 |
+
from __future__ import annotations
|
| 163 |
+
|
| 164 |
+
import logging
|
| 165 |
+
import numpy as np
|
| 166 |
+
|
| 167 |
+
from formscout import config
|
| 168 |
+
from formscout.types import IngestResult, Pose2DResult
|
| 169 |
+
|
| 170 |
+
logger = logging.getLogger(__name__)
|
| 171 |
+
|
| 172 |
+
COCO_KEYPOINTS = [
|
| 173 |
+
"nose", "left_eye", "right_eye", "left_ear", "right_ear",
|
| 174 |
+
"left_shoulder", "right_shoulder", "left_elbow", "right_elbow",
|
| 175 |
+
"left_wrist", "right_wrist", "left_hip", "right_hip",
|
| 176 |
+
"left_knee", "right_knee", "left_ankle", "right_ankle",
|
| 177 |
+
]
|
| 178 |
+
|
| 179 |
+
# BlazePose-33 → COCO-17 index mapping
|
| 180 |
+
_BLAZEPOSE_TO_COCO: dict[int, int] = {
|
| 181 |
+
0: 0, # nose
|
| 182 |
+
1: 2, # left_eye (inner → left_eye)
|
| 183 |
+
2: 1, # right_eye (inner → right_eye) — swapped: BlazePose 1=left_eye_inner
|
| 184 |
+
3: 3, # left_ear
|
| 185 |
+
4: 4, # right_ear
|
| 186 |
+
5: 5, # left_shoulder → COCO left_shoulder... wait
|
| 187 |
+
# Correct BlazePose-33 COCO mapping (canonical):
|
| 188 |
+
# BlazePose idx : COCO idx
|
| 189 |
+
# 0 nose → COCO 0
|
| 190 |
+
# 2 left_eye → COCO 1
|
| 191 |
+
# 5 right_eye → COCO 2
|
| 192 |
+
# 7 left_ear → COCO 3
|
| 193 |
+
# 8 right_ear → COCO 4
|
| 194 |
+
# 11 left_shoulder → COCO 5
|
| 195 |
+
# 12 right_shoulder → COCO 6
|
| 196 |
+
# 13 left_elbow → COCO 7
|
| 197 |
+
# 14 right_elbow → COCO 8
|
| 198 |
+
# 15 left_wrist → COCO 9
|
| 199 |
+
# 16 right_wrist → COCO 10
|
| 200 |
+
# 23 left_hip → COCO 11
|
| 201 |
+
# 24 right_hip → COCO 12
|
| 202 |
+
# 25 left_knee → COCO 13
|
| 203 |
+
# 26 right_knee → COCO 14
|
| 204 |
+
# 27 left_ankle → COCO 15
|
| 205 |
+
# 28 right_ankle → COCO 16
|
| 206 |
+
}
|
| 207 |
+
|
| 208 |
+
# BlazePose source index → COCO target index (correct mapping, no duplicates)
|
| 209 |
+
_BP_SRC = [0, 2, 5, 7, 8, 11, 12, 13, 14, 15, 16, 23, 24, 25, 26, 27, 28]
|
| 210 |
+
_BP_DST = list(range(17)) # COCO 0..16
|
| 211 |
+
|
| 212 |
+
_model_cache: dict[str, object] = {}
|
| 213 |
+
|
| 214 |
+
|
| 215 |
+
# ── YOLO backend ─────────────────────────────────────────────────────────────
|
| 216 |
+
|
| 217 |
+
def _get_yolo(path: str) -> object:
|
| 218 |
+
if path not in _model_cache:
|
| 219 |
+
from ultralytics import YOLO
|
| 220 |
+
_model_cache[path] = YOLO(path)
|
| 221 |
+
return _model_cache[path]
|
| 222 |
+
|
| 223 |
+
|
| 224 |
+
def _run_yolo(frames: list, path: str) -> list[dict]:
|
| 225 |
+
model = _get_yolo(path)
|
| 226 |
+
out = []
|
| 227 |
+
for frame in frames:
|
| 228 |
+
try:
|
| 229 |
+
results = model(frame, verbose=False)
|
| 230 |
+
kps: dict[int, dict] = {}
|
| 231 |
+
if results and results[0].keypoints is not None:
|
| 232 |
+
kp = results[0].keypoints
|
| 233 |
+
if kp.xy is not None and len(kp.xy) > 0:
|
| 234 |
+
xy = kp.xy[0].cpu().numpy()
|
| 235 |
+
conf = kp.conf[0].cpu().numpy()
|
| 236 |
+
for j in range(min(len(xy), 17)):
|
| 237 |
+
kps[j] = {"x": float(xy[j, 0]), "y": float(xy[j, 1]), "conf": float(conf[j])}
|
| 238 |
+
out.append(kps)
|
| 239 |
+
except Exception:
|
| 240 |
+
out.append({})
|
| 241 |
+
return out
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
# ── MediaPipe backend ────────────────────────────────────────────────────────
|
| 245 |
+
|
| 246 |
+
def _get_mediapipe_sessions(hf_id: str):
|
| 247 |
+
"""Return (detector_session, landmark_session) cached by hf_id."""
|
| 248 |
+
cache_key = f"mp:{hf_id}"
|
| 249 |
+
if cache_key not in _model_cache:
|
| 250 |
+
from huggingface_hub import snapshot_download
|
| 251 |
+
import onnxruntime as ort
|
| 252 |
+
from pathlib import Path
|
| 253 |
+
|
| 254 |
+
snap = Path(snapshot_download(hf_id))
|
| 255 |
+
onnx_files = sorted(snap.glob("**/*.onnx"), key=lambda p: p.stat().st_size)
|
| 256 |
+
if len(onnx_files) < 2:
|
| 257 |
+
raise RuntimeError(f"Expected 2 ONNX files in {snap}, found {len(onnx_files)}")
|
| 258 |
+
# Smaller file = pose detector; larger = pose landmark detector
|
| 259 |
+
det_sess = ort.InferenceSession(str(onnx_files[0]))
|
| 260 |
+
lmk_sess = ort.InferenceSession(str(onnx_files[-1]))
|
| 261 |
+
_model_cache[cache_key] = (det_sess, lmk_sess)
|
| 262 |
+
return _model_cache[cache_key]
|
| 263 |
+
|
| 264 |
+
|
| 265 |
+
def _preprocess_mediapipe(frame: np.ndarray, size: int = 256) -> np.ndarray:
|
| 266 |
+
"""Resize to size×size, normalize to [0,1], add batch dim → (1,3,H,W)."""
|
| 267 |
+
import cv2
|
| 268 |
+
img = cv2.resize(frame, (size, size)).astype(np.float32) / 255.0
|
| 269 |
+
return img.transpose(2, 0, 1)[None] # (1, 3, 256, 256)
|
| 270 |
+
|
| 271 |
+
|
| 272 |
+
def _run_mediapipe(frames: list, hf_id: str) -> list[dict]:
|
| 273 |
+
try:
|
| 274 |
+
det_sess, lmk_sess = _get_mediapipe_sessions(hf_id)
|
| 275 |
+
except Exception as e:
|
| 276 |
+
logger.warning("mediapipe load failed: %s", e)
|
| 277 |
+
return [{} for _ in frames]
|
| 278 |
+
|
| 279 |
+
import cv2
|
| 280 |
+
h_orig, w_orig = frames[0].shape[:2] if frames else (480, 640)
|
| 281 |
+
out = []
|
| 282 |
+
|
| 283 |
+
for frame in frames:
|
| 284 |
+
try:
|
| 285 |
+
h, w = frame.shape[:2]
|
| 286 |
+
inp = _preprocess_mediapipe(frame)
|
| 287 |
+
|
| 288 |
+
# Run landmark detector directly on full frame (single-person FMS use-case)
|
| 289 |
+
lmk_input_name = lmk_sess.get_inputs()[0].name
|
| 290 |
+
lmk_out = lmk_sess.run(None, {lmk_input_name: inp})
|
| 291 |
+
|
| 292 |
+
# lmk_out[0] shape: (1, 33, 3) — [x, y, visibility] normalized 0..1
|
| 293 |
+
landmarks = lmk_out[0][0] # (33, 3)
|
| 294 |
+
|
| 295 |
+
kps: dict[int, dict] = {}
|
| 296 |
+
for coco_idx, bp_idx in zip(_BP_DST, _BP_SRC):
|
| 297 |
+
if bp_idx < len(landmarks):
|
| 298 |
+
lm = landmarks[bp_idx]
|
| 299 |
+
kps[coco_idx] = {
|
| 300 |
+
"x": float(lm[0] * w),
|
| 301 |
+
"y": float(lm[1] * h),
|
| 302 |
+
"conf": float(lm[2]), # visibility score
|
| 303 |
+
}
|
| 304 |
+
out.append(kps)
|
| 305 |
+
except Exception:
|
| 306 |
+
out.append({})
|
| 307 |
+
|
| 308 |
+
return out
|
| 309 |
+
|
| 310 |
+
|
| 311 |
+
# ── Sapiens2 backend ─────────────────────────────────────────────────────────
|
| 312 |
+
|
| 313 |
+
# COCO-17 keypoint names in order (used to map Sapiens2 named output → COCO index)
|
| 314 |
+
_COCO_NAMES = [
|
| 315 |
+
"nose", "left_eye", "right_eye", "left_ear", "right_ear",
|
| 316 |
+
"left_shoulder", "right_shoulder", "left_elbow", "right_elbow",
|
| 317 |
+
"left_wrist", "right_wrist", "left_hip", "right_hip",
|
| 318 |
+
"left_knee", "right_knee", "left_ankle", "right_ankle",
|
| 319 |
+
]
|
| 320 |
+
|
| 321 |
+
|
| 322 |
+
def _get_sapiens2(hf_id: str) -> object:
|
| 323 |
+
if hf_id not in _model_cache:
|
| 324 |
+
from transformers import pipeline as hf_pipeline
|
| 325 |
+
_model_cache[hf_id] = hf_pipeline("pose-estimation", model=hf_id)
|
| 326 |
+
return _model_cache[hf_id]
|
| 327 |
+
|
| 328 |
+
|
| 329 |
+
def _run_sapiens2(frames: list, hf_id: str) -> list[dict]:
|
| 330 |
+
try:
|
| 331 |
+
pipe = _get_sapiens2(hf_id)
|
| 332 |
+
except Exception as e:
|
| 333 |
+
logger.warning("sapiens2 load failed: %s", e)
|
| 334 |
+
return [{} for _ in frames]
|
| 335 |
+
|
| 336 |
+
from PIL import Image
|
| 337 |
+
out = []
|
| 338 |
+
|
| 339 |
+
for frame in frames:
|
| 340 |
+
try:
|
| 341 |
+
pil_img = Image.fromarray(frame)
|
| 342 |
+
result = pipe(pil_img)
|
| 343 |
+
|
| 344 |
+
# result is a list of person dicts; take the first (highest confidence)
|
| 345 |
+
if not result:
|
| 346 |
+
out.append({})
|
| 347 |
+
continue
|
| 348 |
+
|
| 349 |
+
person = result[0]
|
| 350 |
+
keypoints = person.get("keypoints", [])
|
| 351 |
+
scores = person.get("keypoint_scores", [])
|
| 352 |
+
|
| 353 |
+
# Build name→(x,y,score) lookup from pipeline output
|
| 354 |
+
kp_lookup: dict[str, tuple] = {}
|
| 355 |
+
for i, kp in enumerate(keypoints):
|
| 356 |
+
name = kp.get("label", "") if isinstance(kp, dict) else ""
|
| 357 |
+
x = kp.get("x", 0.0) if isinstance(kp, dict) else float(kp[0])
|
| 358 |
+
y = kp.get("y", 0.0) if isinstance(kp, dict) else float(kp[1])
|
| 359 |
+
score = scores[i] if i < len(scores) else 0.0
|
| 360 |
+
if name:
|
| 361 |
+
kp_lookup[name] = (x, y, float(score))
|
| 362 |
+
|
| 363 |
+
kps: dict[int, dict] = {}
|
| 364 |
+
for coco_idx, name in enumerate(_COCO_NAMES):
|
| 365 |
+
if name in kp_lookup:
|
| 366 |
+
x, y, s = kp_lookup[name]
|
| 367 |
+
kps[coco_idx] = {"x": x, "y": y, "conf": s}
|
| 368 |
+
|
| 369 |
+
out.append(kps)
|
| 370 |
+
except Exception:
|
| 371 |
+
out.append({})
|
| 372 |
+
|
| 373 |
+
return out
|
| 374 |
+
|
| 375 |
+
|
| 376 |
+
# ── Agent ────────────────────────────────────────────────────────────────────
|
| 377 |
+
|
| 378 |
+
class Pose2DAgent:
|
| 379 |
+
"""Extracts COCO-17 keypoints per frame; dispatches to YOLO, MediaPipe, or Sapiens2."""
|
| 380 |
+
|
| 381 |
+
def run(self, ingest: IngestResult, model_key: str | None = None) -> Pose2DResult:
|
| 382 |
+
if not ingest.frames:
|
| 383 |
+
return Pose2DResult(keypoints=[], fps=ingest.fps, confidence=0.0, notes="no frames in ingest")
|
| 384 |
+
|
| 385 |
+
key = model_key or config.DEFAULT_POSE_MODEL
|
| 386 |
+
spec = config.POSE_MODELS.get(key)
|
| 387 |
+
if spec is None:
|
| 388 |
+
logger.warning("Unknown model_key %r — falling back to %s", key, config.DEFAULT_POSE_MODEL)
|
| 389 |
+
spec = config.POSE_MODELS[config.DEFAULT_POSE_MODEL]
|
| 390 |
+
|
| 391 |
+
backend = spec["backend"]
|
| 392 |
+
try:
|
| 393 |
+
if backend == "yolo":
|
| 394 |
+
kps_per_frame = _run_yolo(ingest.frames, spec["path"])
|
| 395 |
+
elif backend == "mediapipe":
|
| 396 |
+
kps_per_frame = _run_mediapipe(ingest.frames, spec["hf_id"])
|
| 397 |
+
elif backend == "sapiens2":
|
| 398 |
+
kps_per_frame = _run_sapiens2(ingest.frames, spec["hf_id"])
|
| 399 |
+
else:
|
| 400 |
+
return Pose2DResult(
|
| 401 |
+
keypoints=[{} for _ in ingest.frames],
|
| 402 |
+
fps=ingest.fps, confidence=0.0,
|
| 403 |
+
notes=f"unknown backend: {backend}",
|
| 404 |
+
)
|
| 405 |
+
except Exception as e:
|
| 406 |
+
return Pose2DResult(
|
| 407 |
+
keypoints=[{} for _ in ingest.frames],
|
| 408 |
+
fps=ingest.fps, confidence=0.0,
|
| 409 |
+
notes=str(e),
|
| 410 |
+
)
|
| 411 |
+
|
| 412 |
+
n_detected = sum(1 for f in kps_per_frame if f)
|
| 413 |
+
total_conf = sum(
|
| 414 |
+
sum(kp["conf"] for kp in f.values()) / len(f)
|
| 415 |
+
for f in kps_per_frame if f
|
| 416 |
+
)
|
| 417 |
+
overall_conf = (total_conf / n_detected) if n_detected > 0 else 0.0
|
| 418 |
+
notes = "" if n_detected > 0 else "no person detected in any frame"
|
| 419 |
+
|
| 420 |
+
return Pose2DResult(
|
| 421 |
+
keypoints=kps_per_frame,
|
| 422 |
+
fps=ingest.fps,
|
| 423 |
+
confidence=overall_conf,
|
| 424 |
+
notes=notes,
|
| 425 |
+
)
|
| 426 |
+
```
|
| 427 |
+
|
| 428 |
+
- [ ] **Step 4: Run the new signature test**
|
| 429 |
+
|
| 430 |
+
```bash
|
| 431 |
+
pytest tests/test_pose2d.py::TestPose2DAgent::test_run_accepts_model_key -v
|
| 432 |
+
```
|
| 433 |
+
|
| 434 |
+
Expected: PASS
|
| 435 |
+
|
| 436 |
+
- [ ] **Step 5: Run full existing pose2d test suite**
|
| 437 |
+
|
| 438 |
+
```bash
|
| 439 |
+
pytest tests/test_pose2d.py -v
|
| 440 |
+
```
|
| 441 |
+
|
| 442 |
+
Expected: all existing tests pass (they will skip if YOLO model unavailable in env — that's OK).
|
| 443 |
+
|
| 444 |
+
- [ ] **Step 6: Commit and push**
|
| 445 |
+
|
| 446 |
+
```bash
|
| 447 |
+
git add formscout/agents/pose2d.py tests/test_pose2d.py
|
| 448 |
+
git commit -m "feat: Pose2DAgent — three backends (yolo/mediapipe/sapiens2), model_key dispatch"
|
| 449 |
+
git push
|
| 450 |
+
```
|
| 451 |
+
|
| 452 |
+
---
|
| 453 |
+
|
| 454 |
+
## Task 3: Add `onnxruntime` to requirements
|
| 455 |
+
|
| 456 |
+
**Files:**
|
| 457 |
+
- Modify: `requirements.txt`
|
| 458 |
+
|
| 459 |
+
- [ ] **Step 1: Add onnxruntime**
|
| 460 |
+
|
| 461 |
+
Open `requirements.txt` and add after the existing `transformers` line:
|
| 462 |
+
|
| 463 |
+
```
|
| 464 |
+
onnxruntime>=1.18
|
| 465 |
+
```
|
| 466 |
+
|
| 467 |
+
- [ ] **Step 2: Verify it installs**
|
| 468 |
+
|
| 469 |
+
```bash
|
| 470 |
+
pip install onnxruntime --quiet && python3 -c "import onnxruntime; print(onnxruntime.__version__)"
|
| 471 |
+
```
|
| 472 |
+
|
| 473 |
+
Expected: version string printed, no errors.
|
| 474 |
+
|
| 475 |
+
- [ ] **Step 3: Commit and push**
|
| 476 |
+
|
| 477 |
+
```bash
|
| 478 |
+
git add requirements.txt
|
| 479 |
+
git commit -m "chore: add onnxruntime for MediaPipe ONNX backend"
|
| 480 |
+
git push
|
| 481 |
+
```
|
| 482 |
+
|
| 483 |
+
---
|
| 484 |
+
|
| 485 |
+
## Task 4: Update `Director.run()` — `pose_model_path` → `model_key`
|
| 486 |
+
|
| 487 |
+
**Files:**
|
| 488 |
+
- Modify: `formscout/pipeline.py`
|
| 489 |
+
|
| 490 |
+
- [ ] **Step 1: Update the signature and the `pose2d` call**
|
| 491 |
+
|
| 492 |
+
In `formscout/pipeline.py`, change `Director.run()`:
|
| 493 |
+
|
| 494 |
+
```python
|
| 495 |
+
def run(self, video_path: str, test_name: str = "deep_squat", side: str = "na", model_key: str | None = None) -> PipelineState:
|
| 496 |
+
"""
|
| 497 |
+
Run the full pipeline on a single video.
|
| 498 |
+
test_name/side serve as manual override when provided (skips classifier).
|
| 499 |
+
model_key selects the pose backend (see config.POSE_MODELS).
|
| 500 |
+
"""
|
| 501 |
+
state = PipelineState(video_path=video_path)
|
| 502 |
+
|
| 503 |
+
# ─── Ingest ───
|
| 504 |
+
state.ingest = self._ingest.run(video_path)
|
| 505 |
+
if state.ingest.confidence < config.MIN_CONFIDENCE:
|
| 506 |
+
state.errors.append("ingest: low confidence — video may be corrupt")
|
| 507 |
+
return state
|
| 508 |
+
|
| 509 |
+
# ─── Pose 2D ───
|
| 510 |
+
state.pose2d = self._pose2d.run(state.ingest, model_key=model_key)
|
| 511 |
+
# ... rest of method unchanged
|
| 512 |
+
```
|
| 513 |
+
|
| 514 |
+
(Only the signature line and the `self._pose2d.run(...)` call change — everything else stays the same.)
|
| 515 |
+
|
| 516 |
+
- [ ] **Step 2: Verify import is clean**
|
| 517 |
+
|
| 518 |
+
```bash
|
| 519 |
+
python3 -c "from formscout.pipeline import Director; d = Director(); print('ok')"
|
| 520 |
+
```
|
| 521 |
+
|
| 522 |
+
Expected: `ok` (models load lazily so no crash here).
|
| 523 |
+
|
| 524 |
+
- [ ] **Step 3: Commit and push**
|
| 525 |
+
|
| 526 |
+
```bash
|
| 527 |
+
git add formscout/pipeline.py
|
| 528 |
+
git commit -m "feat: Director.run() accepts model_key, threads to Pose2DAgent"
|
| 529 |
+
git push
|
| 530 |
+
```
|
| 531 |
+
|
| 532 |
+
---
|
| 533 |
+
|
| 534 |
+
## Task 5: Wire the UI — pose model dropdown in `app.py`
|
| 535 |
+
|
| 536 |
+
**Files:**
|
| 537 |
+
- Modify: `app.py`
|
| 538 |
+
|
| 539 |
+
- [ ] **Step 1: Update `process_video` to use `model_key` and the unified registry**
|
| 540 |
+
|
| 541 |
+
Replace the existing `process_video` function signature and the old `YOLO_POSE_MODELS.get()` lookup:
|
| 542 |
+
|
| 543 |
+
```python
|
| 544 |
+
def process_video(video_path: str, test_name: str, side: str, model_key: str):
|
| 545 |
+
"""Process an uploaded video through the FormScout pipeline."""
|
| 546 |
+
if not video_path:
|
| 547 |
+
return (
|
| 548 |
+
_render_empty_state(),
|
| 549 |
+
"Upload a video to begin analysis.",
|
| 550 |
+
"",
|
| 551 |
+
"",
|
| 552 |
+
)
|
| 553 |
+
|
| 554 |
+
director = Director()
|
| 555 |
+
state = director.run(video_path, test_name=test_name, side=side, model_key=model_key)
|
| 556 |
+
```
|
| 557 |
+
|
| 558 |
+
(Remove the `pose_model_path = config.YOLO_POSE_MODELS.get(...)` line entirely.)
|
| 559 |
+
|
| 560 |
+
- [ ] **Step 2: Add the `pose_model_dropdown` in `build_app()`**
|
| 561 |
+
|
| 562 |
+
Inside `build_app()`, after the `side_dropdown` block (around line 265) and before `submit_btn`, add:
|
| 563 |
+
|
| 564 |
+
```python
|
| 565 |
+
pose_model_dropdown = gr.Dropdown(
|
| 566 |
+
choices=list(config.POSE_MODELS.keys()),
|
| 567 |
+
value=config.DEFAULT_POSE_MODEL,
|
| 568 |
+
label="Pose Model",
|
| 569 |
+
)
|
| 570 |
+
```
|
| 571 |
+
|
| 572 |
+
- [ ] **Step 3: Update `_map_inputs` to pass the model key**
|
| 573 |
+
|
| 574 |
+
Replace the existing `_map_inputs` closure:
|
| 575 |
+
|
| 576 |
+
```python
|
| 577 |
+
def _map_inputs(video, test_display_name, side_display, pose_model_key):
|
| 578 |
+
"""Map UI display values to internal values."""
|
| 579 |
+
test_map = {name: val for name, val in FMS_TESTS}
|
| 580 |
+
test_name = test_map.get(test_display_name, "deep_squat")
|
| 581 |
+
side = {"N/A": "na", "Left": "left", "Right": "right"}.get(side_display, "na")
|
| 582 |
+
return process_video(video, test_name, side, pose_model_key)
|
| 583 |
+
```
|
| 584 |
+
|
| 585 |
+
- [ ] **Step 4: Update `submit_btn.click` to include `pose_model_dropdown`**
|
| 586 |
+
|
| 587 |
+
Replace the existing `.click(...)` call:
|
| 588 |
+
|
| 589 |
+
```python
|
| 590 |
+
submit_btn.click(
|
| 591 |
+
fn=_map_inputs,
|
| 592 |
+
inputs=[video_input, test_dropdown, side_dropdown, pose_model_dropdown],
|
| 593 |
+
outputs=[score_html, pipeline_md, score_details, alerts_md],
|
| 594 |
+
)
|
| 595 |
+
```
|
| 596 |
+
|
| 597 |
+
- [ ] **Step 5: Smoke-test the app starts**
|
| 598 |
+
|
| 599 |
+
```bash
|
| 600 |
+
python3 -c "from app import build_app; app = build_app(); print('app built ok')"
|
| 601 |
+
```
|
| 602 |
+
|
| 603 |
+
Expected: `app built ok` — no import or config errors.
|
| 604 |
+
|
| 605 |
+
- [ ] **Step 6: Commit and push**
|
| 606 |
+
|
| 607 |
+
```bash
|
| 608 |
+
git add app.py
|
| 609 |
+
git commit -m "feat: pose model dropdown in UI, wired through process_video → Director"
|
| 610 |
+
git push
|
| 611 |
+
```
|
| 612 |
+
|
| 613 |
+
---
|
| 614 |
+
|
| 615 |
+
## Task 6: Add mocked backend tests
|
| 616 |
+
|
| 617 |
+
**Files:**
|
| 618 |
+
- Modify: `tests/test_pose2d.py`
|
| 619 |
+
|
| 620 |
+
- [ ] **Step 1: Add mocked YOLO test**
|
| 621 |
+
|
| 622 |
+
Append to `tests/test_pose2d.py`:
|
| 623 |
+
|
| 624 |
+
```python
|
| 625 |
+
import unittest.mock as mock
|
| 626 |
+
import numpy as np
|
| 627 |
+
from formscout.types import IngestResult, Pose2DResult
|
| 628 |
+
|
| 629 |
+
|
| 630 |
+
def _blank_ingest_3():
|
| 631 |
+
frames = [np.zeros((480, 640, 3), dtype=np.uint8) for _ in range(3)]
|
| 632 |
+
return IngestResult(frames=frames, fps=30.0, duration=0.1, n_people=1, width=640, height=480)
|
| 633 |
+
|
| 634 |
+
|
| 635 |
+
class TestPose2DBackendsMocked:
|
| 636 |
+
"""Backend dispatch tests — no real model downloads."""
|
| 637 |
+
|
| 638 |
+
def test_yolo_backend_dispatches(self):
|
| 639 |
+
from formscout.agents.pose2d import Pose2DAgent, _run_yolo
|
| 640 |
+
fake_kps = [{0: {"x": 10.0, "y": 20.0, "conf": 0.9}} for _ in range(3)]
|
| 641 |
+
with mock.patch("formscout.agents.pose2d._run_yolo", return_value=fake_kps) as m:
|
| 642 |
+
agent = Pose2DAgent()
|
| 643 |
+
result = agent.run(_blank_ingest_3(), model_key="YOLO26n — nano (0.7M, fastest)")
|
| 644 |
+
m.assert_called_once()
|
| 645 |
+
assert isinstance(result, Pose2DResult)
|
| 646 |
+
assert len(result.keypoints) == 3
|
| 647 |
+
assert result.confidence > 0.0
|
| 648 |
+
|
| 649 |
+
def test_mediapipe_backend_dispatches(self):
|
| 650 |
+
from formscout.agents.pose2d import Pose2DAgent
|
| 651 |
+
fake_kps = [{i: {"x": float(i), "y": float(i), "conf": 0.8} for i in range(17)} for _ in range(3)]
|
| 652 |
+
with mock.patch("formscout.agents.pose2d._run_mediapipe", return_value=fake_kps) as m:
|
| 653 |
+
agent = Pose2DAgent()
|
| 654 |
+
result = agent.run(_blank_ingest_3(), model_key="MediaPipe-Pose ⬇ ~16 MB, CPU-friendly")
|
| 655 |
+
m.assert_called_once()
|
| 656 |
+
assert isinstance(result, Pose2DResult)
|
| 657 |
+
assert len(result.keypoints) == 3
|
| 658 |
+
assert all(len(f) == 17 for f in result.keypoints)
|
| 659 |
+
|
| 660 |
+
def test_sapiens2_backend_dispatches(self):
|
| 661 |
+
from formscout.agents.pose2d import Pose2DAgent
|
| 662 |
+
fake_kps = [{i: {"x": float(i), "y": float(i), "conf": 0.85} for i in range(17)} for _ in range(3)]
|
| 663 |
+
with mock.patch("formscout.agents.pose2d._run_sapiens2", return_value=fake_kps) as m:
|
| 664 |
+
agent = Pose2DAgent()
|
| 665 |
+
result = agent.run(_blank_ingest_3(), model_key="Sapiens2-0.4B ⬇ ~1.6 GB")
|
| 666 |
+
m.assert_called_once()
|
| 667 |
+
assert isinstance(result, Pose2DResult)
|
| 668 |
+
assert len(result.keypoints) == 3
|
| 669 |
+
|
| 670 |
+
def test_unknown_model_key_falls_back(self):
|
| 671 |
+
from formscout.agents.pose2d import Pose2DAgent
|
| 672 |
+
fake_kps = [{0: {"x": 1.0, "y": 2.0, "conf": 0.7}} for _ in range(3)]
|
| 673 |
+
with mock.patch("formscout.agents.pose2d._run_yolo", return_value=fake_kps):
|
| 674 |
+
agent = Pose2DAgent()
|
| 675 |
+
result = agent.run(_blank_ingest_3(), model_key="nonexistent-model-xyz")
|
| 676 |
+
assert isinstance(result, Pose2DResult) # graceful fallback, no crash
|
| 677 |
+
|
| 678 |
+
def test_confidence_zero_on_empty_keypoints(self):
|
| 679 |
+
from formscout.agents.pose2d import Pose2DAgent
|
| 680 |
+
with mock.patch("formscout.agents.pose2d._run_yolo", return_value=[{}, {}, {}]):
|
| 681 |
+
agent = Pose2DAgent()
|
| 682 |
+
result = agent.run(_blank_ingest_3(), model_key="YOLO26n — nano (0.7M, fastest)")
|
| 683 |
+
assert result.confidence == 0.0
|
| 684 |
+
assert "no person" in result.notes.lower()
|
| 685 |
+
```
|
| 686 |
+
|
| 687 |
+
- [ ] **Step 2: Run the new tests**
|
| 688 |
+
|
| 689 |
+
```bash
|
| 690 |
+
pytest tests/test_pose2d.py::TestPose2DBackendsMocked -v
|
| 691 |
+
```
|
| 692 |
+
|
| 693 |
+
Expected: all 5 tests PASS.
|
| 694 |
+
|
| 695 |
+
- [ ] **Step 3: Run the full test suite to check for regressions**
|
| 696 |
+
|
| 697 |
+
```bash
|
| 698 |
+
pytest tests/ -v --tb=short 2>&1 | tail -30
|
| 699 |
+
```
|
| 700 |
+
|
| 701 |
+
Expected: same pass/fail ratio as before (45/46 known passing). The one known failure (`test_unimplemented_test_returns_low_confidence`) is pre-existing — ignore it.
|
| 702 |
+
|
| 703 |
+
- [ ] **Step 4: Commit and push**
|
| 704 |
+
|
| 705 |
+
```bash
|
| 706 |
+
git add tests/test_pose2d.py
|
| 707 |
+
git commit -m "test: mocked backend dispatch tests for YOLO, MediaPipe, Sapiens2"
|
| 708 |
+
git push
|
| 709 |
+
```
|
| 710 |
+
|
| 711 |
+
---
|
| 712 |
+
|
| 713 |
+
## Self-review
|
| 714 |
+
|
| 715 |
+
**Spec coverage:**
|
| 716 |
+
- ✅ Unified `POSE_MODELS` registry (Task 1)
|
| 717 |
+
- ✅ `DEFAULT_POSE_MODEL = YOLO26n` (Task 1)
|
| 718 |
+
- ✅ Backward-compat `YOLO_POSE_MODEL` / `YOLO_POSE_MODEL_HQ` aliases (Task 1)
|
| 719 |
+
- ✅ `_run_yolo` sub-runner (Task 2)
|
| 720 |
+
- ✅ `_run_mediapipe` with ONNX Runtime + BlazePose→COCO-17 mapping (Task 2)
|
| 721 |
+
- ✅ `_run_sapiens2` with transformers pipeline + named-keypoint→COCO-17 mapping (Task 2)
|
| 722 |
+
- ✅ `Pose2DAgent.run(model_key)` dispatch + fallback on unknown key (Task 2)
|
| 723 |
+
- ✅ `onnxruntime` added to requirements (Task 3)
|
| 724 |
+
- ✅ `Director.run(model_key)` threads key to agent (Task 4)
|
| 725 |
+
- ✅ `pose_model_dropdown` in UI (Task 5)
|
| 726 |
+
- ✅ `_map_inputs` + `submit_btn.click` wired (Task 5)
|
| 727 |
+
- ✅ Error handling: unknown key → warning + fallback; download failure → confidence=0 (Task 2)
|
| 728 |
+
- ✅ Mocked tests for all three backends (Task 6)
|
| 729 |
+
|
| 730 |
+
**Placeholder scan:** None found.
|
| 731 |
+
|
| 732 |
+
**Type consistency:** `model_key: str | None` used consistently across `Pose2DAgent.run`, `Director.run`, `process_video`. `config.POSE_MODELS` and `config.DEFAULT_POSE_MODEL` referenced consistently.
|
| 733 |
+
|
| 734 |
+
**Note on Sapiens2 keypoint format:** The `_run_sapiens2` implementation uses **named keypoint lookup** (by label string) rather than assuming fixed indices 0–16 = COCO. This is the safe approach — the transformers pipeline returns labeled keypoints and the code maps by name. If the pipeline returns unnamed keypoints (index-only), the `kp_lookup` will be empty and the frame will gracefully return `{}`.
|
docs/superpowers/plans/2026-06-09-pose-visualizer.md
ADDED
|
@@ -0,0 +1,914 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Pose Overlay Visualizer Implementation Plan
|
| 2 |
+
|
| 3 |
+
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
|
| 4 |
+
|
| 5 |
+
**Goal:** Add a pose overlay video output to FormScout with skeleton, motion trails, and velocity arrows, plus a per-joint velocity summary table.
|
| 6 |
+
|
| 7 |
+
**Architecture:** A new `formscout/agents/visualizer.py` runs after `director.run()` in `process_video()`; it uses Kalman-filtered per-joint velocity and OpenCV rendering. `app.py` gains a `gr.CheckboxGroup` for layer selection, a new `gr.Video` output tab, and a `gr.Markdown` velocity summary.
|
| 8 |
+
|
| 9 |
+
**Tech Stack:** `opencv-python`, `numpy`, `colorsys` (stdlib), `gradio`.
|
| 10 |
+
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
## File map
|
| 14 |
+
|
| 15 |
+
| File | Change |
|
| 16 |
+
|---|---|
|
| 17 |
+
| `formscout/agents/visualizer.py` | Create — Kalman filter, velocity, PoseVisualizer, summary |
|
| 18 |
+
| `tests/test_visualizer.py` | Create — all visualizer tests |
|
| 19 |
+
| `app.py` | Modify — overlay_layers checkbox, new tab, wiring |
|
| 20 |
+
|
| 21 |
+
---
|
| 22 |
+
|
| 23 |
+
## Task 1: `SimpleKalmanFilter` + `compute_joint_velocity`
|
| 24 |
+
|
| 25 |
+
**Files:**
|
| 26 |
+
- Create: `formscout/agents/visualizer.py`
|
| 27 |
+
- Create: `tests/test_visualizer.py`
|
| 28 |
+
|
| 29 |
+
- [ ] **Step 1: Write failing tests**
|
| 30 |
+
|
| 31 |
+
Create `tests/test_visualizer.py`:
|
| 32 |
+
|
| 33 |
+
```python
|
| 34 |
+
"""Tests for PoseVisualizer — no GPU, no model downloads."""
|
| 35 |
+
import numpy as np
|
| 36 |
+
import pytest
|
| 37 |
+
from formscout.types import IngestResult, Pose2DResult
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def _make_ingest(n=5, h=480, w=640, fps=30.0):
|
| 41 |
+
frames = [np.zeros((h, w, 3), dtype=np.uint8) for _ in range(n)]
|
| 42 |
+
return IngestResult(frames=frames, fps=fps, duration=n/fps, n_people=1, width=w, height=h)
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def _make_pose(n=5, w=640, h=480):
|
| 46 |
+
"""Synthetic Pose2DResult: 17 joints at fixed pixel positions, conf=0.9."""
|
| 47 |
+
kps_per_frame = []
|
| 48 |
+
for i in range(n):
|
| 49 |
+
frame_kps = {}
|
| 50 |
+
for j in range(17):
|
| 51 |
+
frame_kps[j] = {
|
| 52 |
+
"x": float(50 + j * 30 + i * 2), # slight movement each frame
|
| 53 |
+
"y": float(100 + j * 20),
|
| 54 |
+
"conf": 0.9,
|
| 55 |
+
}
|
| 56 |
+
kps_per_frame.append(frame_kps)
|
| 57 |
+
return Pose2DResult(keypoints=kps_per_frame, fps=30.0, confidence=0.9, notes="")
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
class TestComputeJointVelocity:
|
| 61 |
+
def test_returns_17_joints(self):
|
| 62 |
+
from formscout.agents.visualizer import compute_joint_velocity
|
| 63 |
+
pose = _make_pose(n=5)
|
| 64 |
+
result = compute_joint_velocity(pose.keypoints, fps=30.0)
|
| 65 |
+
assert len(result) == 17
|
| 66 |
+
|
| 67 |
+
def test_each_list_has_n_frames(self):
|
| 68 |
+
from formscout.agents.visualizer import compute_joint_velocity
|
| 69 |
+
pose = _make_pose(n=5)
|
| 70 |
+
result = compute_joint_velocity(pose.keypoints, fps=30.0)
|
| 71 |
+
for joint_idx, speeds in result.items():
|
| 72 |
+
assert len(speeds) == 5, f"joint {joint_idx} has {len(speeds)} speeds, expected 5"
|
| 73 |
+
|
| 74 |
+
def test_speeds_are_non_negative(self):
|
| 75 |
+
from formscout.agents.visualizer import compute_joint_velocity
|
| 76 |
+
pose = _make_pose(n=5)
|
| 77 |
+
result = compute_joint_velocity(pose.keypoints, fps=30.0)
|
| 78 |
+
for speeds in result.values():
|
| 79 |
+
assert all(s >= 0.0 for s in speeds)
|
| 80 |
+
|
| 81 |
+
def test_missing_keypoints_give_zero_speed(self):
|
| 82 |
+
from formscout.agents.visualizer import compute_joint_velocity
|
| 83 |
+
# All frames empty
|
| 84 |
+
empty_kps = [{} for _ in range(5)]
|
| 85 |
+
result = compute_joint_velocity(empty_kps, fps=30.0)
|
| 86 |
+
for speeds in result.values():
|
| 87 |
+
assert all(s == 0.0 for s in speeds)
|
| 88 |
+
```
|
| 89 |
+
|
| 90 |
+
- [ ] **Step 2: Run to confirm failure**
|
| 91 |
+
|
| 92 |
+
```bash
|
| 93 |
+
pytest tests/test_visualizer.py::TestComputeJointVelocity -v
|
| 94 |
+
```
|
| 95 |
+
|
| 96 |
+
Expected: `ERROR` — `ModuleNotFoundError: No module named 'formscout.agents.visualizer'`
|
| 97 |
+
|
| 98 |
+
- [ ] **Step 3: Create `formscout/agents/visualizer.py` with Kalman + velocity**
|
| 99 |
+
|
| 100 |
+
```python
|
| 101 |
+
"""
|
| 102 |
+
PoseVisualizer — annotated overlay video with skeleton, trails, velocity arrows.
|
| 103 |
+
|
| 104 |
+
Input: IngestResult + Pose2DResult
|
| 105 |
+
Output: .mp4 path (or None on failure/empty layers)
|
| 106 |
+
Failure: returns None, never raises.
|
| 107 |
+
"""
|
| 108 |
+
from __future__ import annotations
|
| 109 |
+
|
| 110 |
+
import colorsys
|
| 111 |
+
import logging
|
| 112 |
+
import math
|
| 113 |
+
import tempfile
|
| 114 |
+
from collections import deque
|
| 115 |
+
|
| 116 |
+
import cv2
|
| 117 |
+
import numpy as np
|
| 118 |
+
|
| 119 |
+
logger = logging.getLogger(__name__)
|
| 120 |
+
|
| 121 |
+
# ── COCO constants ────────────────────────────────────────────────────────────
|
| 122 |
+
|
| 123 |
+
COCO_KEYPOINTS = [
|
| 124 |
+
"nose", "left_eye", "right_eye", "left_ear", "right_ear",
|
| 125 |
+
"left_shoulder", "right_shoulder", "left_elbow", "right_elbow",
|
| 126 |
+
"left_wrist", "right_wrist", "left_hip", "right_hip",
|
| 127 |
+
"left_knee", "right_knee", "left_ankle", "right_ankle",
|
| 128 |
+
]
|
| 129 |
+
|
| 130 |
+
COCO_SKELETON = [
|
| 131 |
+
(0, 1), (0, 2), (1, 3), (2, 4), # face
|
| 132 |
+
(5, 6), (5, 7), (7, 9), (6, 8), (8, 10), # arms
|
| 133 |
+
(5, 11), (6, 12), (11, 12), # torso
|
| 134 |
+
(11, 13), (13, 15), (12, 14), (14, 16), # legs
|
| 135 |
+
]
|
| 136 |
+
|
| 137 |
+
TRAIL_LENGTH = 10
|
| 138 |
+
MAX_ARROW_PX = 40
|
| 139 |
+
CONF_THRESHOLD = 0.3
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
# ── Kalman filter ─────────────────────────────────────────────────────────────
|
| 143 |
+
|
| 144 |
+
class SimpleKalmanFilter:
|
| 145 |
+
"""4-state Kalman filter (x, y, vx, vy) for joint tracking."""
|
| 146 |
+
|
| 147 |
+
def __init__(self, process_noise: float = 0.01, measurement_noise: float = 0.1):
|
| 148 |
+
self.is_initialized = False
|
| 149 |
+
self.state = np.zeros(4)
|
| 150 |
+
self.cov = np.eye(4) * 0.1
|
| 151 |
+
self.Q = np.eye(4) * process_noise
|
| 152 |
+
self.R = np.eye(2) * measurement_noise
|
| 153 |
+
self.H = np.array([[1, 0, 0, 0], [0, 1, 0, 0]], dtype=float)
|
| 154 |
+
|
| 155 |
+
def predict(self, dt: float = 1.0):
|
| 156 |
+
F = np.array([[1, 0, dt, 0], [0, 1, 0, dt], [0, 0, 1, 0], [0, 0, 0, 1]], dtype=float)
|
| 157 |
+
self.state = F @ self.state
|
| 158 |
+
self.cov = F @ self.cov @ F.T + self.Q
|
| 159 |
+
|
| 160 |
+
def update(self, x: float, y: float):
|
| 161 |
+
z = np.array([x, y])
|
| 162 |
+
if not self.is_initialized:
|
| 163 |
+
self.state[:2] = z
|
| 164 |
+
self.is_initialized = True
|
| 165 |
+
return
|
| 166 |
+
S = self.H @ self.cov @ self.H.T + self.R
|
| 167 |
+
K = self.cov @ self.H.T @ np.linalg.inv(S)
|
| 168 |
+
self.state = self.state + K @ (z - self.H @ self.state)
|
| 169 |
+
self.cov = (np.eye(4) - K @ self.H) @ self.cov
|
| 170 |
+
|
| 171 |
+
def velocity_magnitude(self) -> float:
|
| 172 |
+
vx, vy = self.state[2], self.state[3]
|
| 173 |
+
return math.sqrt(vx * vx + vy * vy)
|
| 174 |
+
|
| 175 |
+
def velocity_vector(self) -> tuple[float, float]:
|
| 176 |
+
return float(self.state[2]), float(self.state[3])
|
| 177 |
+
|
| 178 |
+
|
| 179 |
+
# ── Velocity computation ──────────────────────────────────────────────────────
|
| 180 |
+
|
| 181 |
+
def compute_joint_velocity(
|
| 182 |
+
keypoints_per_frame: list[dict],
|
| 183 |
+
fps: float,
|
| 184 |
+
) -> dict[int, list[float]]:
|
| 185 |
+
"""
|
| 186 |
+
Compute Kalman-filtered per-joint speed (px/s) for each frame.
|
| 187 |
+
|
| 188 |
+
Returns dict[joint_idx, [speed_frame0, speed_frame1, ...]] for all 17 COCO joints.
|
| 189 |
+
Missing/low-confidence keypoints yield speed=0.0 for that frame.
|
| 190 |
+
"""
|
| 191 |
+
dt = 1.0 / fps if fps > 0 else 1.0
|
| 192 |
+
filters: dict[int, SimpleKalmanFilter] = {j: SimpleKalmanFilter() for j in range(17)}
|
| 193 |
+
result: dict[int, list[float]] = {j: [] for j in range(17)}
|
| 194 |
+
|
| 195 |
+
for frame_kps in keypoints_per_frame:
|
| 196 |
+
for j in range(17):
|
| 197 |
+
kf = filters[j]
|
| 198 |
+
kp = frame_kps.get(j)
|
| 199 |
+
kf.predict(dt)
|
| 200 |
+
if kp and kp.get("conf", 0.0) >= CONF_THRESHOLD:
|
| 201 |
+
kf.update(kp["x"], kp["y"])
|
| 202 |
+
speed = kf.velocity_magnitude()
|
| 203 |
+
else:
|
| 204 |
+
speed = 0.0
|
| 205 |
+
result[j].append(speed)
|
| 206 |
+
|
| 207 |
+
return result
|
| 208 |
+
```
|
| 209 |
+
|
| 210 |
+
- [ ] **Step 4: Run tests**
|
| 211 |
+
|
| 212 |
+
```bash
|
| 213 |
+
pytest tests/test_visualizer.py::TestComputeJointVelocity -v
|
| 214 |
+
```
|
| 215 |
+
|
| 216 |
+
Expected: 4 PASS
|
| 217 |
+
|
| 218 |
+
- [ ] **Step 5: Commit**
|
| 219 |
+
|
| 220 |
+
```bash
|
| 221 |
+
git add formscout/agents/visualizer.py tests/test_visualizer.py
|
| 222 |
+
git commit -m "feat: SimpleKalmanFilter + compute_joint_velocity (4 tests pass)"
|
| 223 |
+
```
|
| 224 |
+
|
| 225 |
+
---
|
| 226 |
+
|
| 227 |
+
## Task 2: `PoseVisualizer._draw_skeleton`
|
| 228 |
+
|
| 229 |
+
**Files:**
|
| 230 |
+
- Modify: `formscout/agents/visualizer.py`
|
| 231 |
+
- Modify: `tests/test_visualizer.py`
|
| 232 |
+
|
| 233 |
+
- [ ] **Step 1: Write failing test**
|
| 234 |
+
|
| 235 |
+
Append to `tests/test_visualizer.py`:
|
| 236 |
+
|
| 237 |
+
```python
|
| 238 |
+
class TestDrawSkeleton:
|
| 239 |
+
def test_skeleton_draws_without_error(self):
|
| 240 |
+
from formscout.agents.visualizer import PoseVisualizer
|
| 241 |
+
vis = PoseVisualizer()
|
| 242 |
+
frame = np.zeros((480, 640, 3), dtype=np.uint8)
|
| 243 |
+
kps = {j: {"x": float(50 + j * 30), "y": float(100 + j * 20), "conf": 0.9}
|
| 244 |
+
for j in range(17)}
|
| 245 |
+
result = vis._draw_skeleton(frame.copy(), kps)
|
| 246 |
+
assert result.shape == frame.shape
|
| 247 |
+
# Frame must be modified (not all zeros after drawing)
|
| 248 |
+
assert not np.array_equal(result, frame)
|
| 249 |
+
|
| 250 |
+
def test_low_confidence_keypoints_not_drawn(self):
|
| 251 |
+
from formscout.agents.visualizer import PoseVisualizer
|
| 252 |
+
vis = PoseVisualizer()
|
| 253 |
+
frame = np.zeros((480, 640, 3), dtype=np.uint8)
|
| 254 |
+
# All keypoints below threshold
|
| 255 |
+
kps = {j: {"x": float(50 + j * 30), "y": 100.0, "conf": 0.1} for j in range(17)}
|
| 256 |
+
result = vis._draw_skeleton(frame.copy(), kps)
|
| 257 |
+
# Nothing drawn — frame stays all zeros
|
| 258 |
+
assert np.array_equal(result, frame)
|
| 259 |
+
```
|
| 260 |
+
|
| 261 |
+
- [ ] **Step 2: Run to confirm failure**
|
| 262 |
+
|
| 263 |
+
```bash
|
| 264 |
+
pytest tests/test_visualizer.py::TestDrawSkeleton -v
|
| 265 |
+
```
|
| 266 |
+
|
| 267 |
+
Expected: FAIL — `AttributeError: 'PoseVisualizer' object has no attribute '_draw_skeleton'`
|
| 268 |
+
|
| 269 |
+
- [ ] **Step 3: Add `PoseVisualizer` class with `_draw_skeleton` to `visualizer.py`**
|
| 270 |
+
|
| 271 |
+
Append after `compute_joint_velocity`:
|
| 272 |
+
|
| 273 |
+
```python
|
| 274 |
+
# ── Helpers ───────────────────────────────────────────────────────────────────
|
| 275 |
+
|
| 276 |
+
def _conf_to_bgr(conf: float) -> tuple[int, int, int]:
|
| 277 |
+
"""Map confidence 0→1 to BGR color red→green via HSV."""
|
| 278 |
+
hue = conf * 120.0 / 360.0
|
| 279 |
+
r, g, b = colorsys.hsv_to_rgb(hue, 1.0, 1.0)
|
| 280 |
+
return (int(b * 255), int(g * 255), int(r * 255))
|
| 281 |
+
|
| 282 |
+
|
| 283 |
+
# ── PoseVisualizer ────────────────────────────────────────────────────────────
|
| 284 |
+
|
| 285 |
+
class PoseVisualizer:
|
| 286 |
+
"""Renders skeleton, trails, and velocity arrows onto video frames."""
|
| 287 |
+
|
| 288 |
+
def __init__(self):
|
| 289 |
+
self.last_velocities: dict[int, list[float]] = {}
|
| 290 |
+
|
| 291 |
+
# ── Skeleton ──────────────────────────────────────────────────────────────
|
| 292 |
+
|
| 293 |
+
def _draw_skeleton(self, frame: np.ndarray, kps: dict) -> np.ndarray:
|
| 294 |
+
"""Draw COCO-17 bones (white) and joints (confidence-colored) onto frame."""
|
| 295 |
+
visible = {j: kp for j, kp in kps.items() if kp.get("conf", 0.0) >= CONF_THRESHOLD}
|
| 296 |
+
|
| 297 |
+
# Bones
|
| 298 |
+
for j1, j2 in COCO_SKELETON:
|
| 299 |
+
if j1 in visible and j2 in visible:
|
| 300 |
+
p1 = (int(visible[j1]["x"]), int(visible[j1]["y"]))
|
| 301 |
+
p2 = (int(visible[j2]["x"]), int(visible[j2]["y"]))
|
| 302 |
+
cv2.line(frame, p1, p2, (255, 255, 255), 2)
|
| 303 |
+
|
| 304 |
+
# Joints
|
| 305 |
+
for j, kp in visible.items():
|
| 306 |
+
pt = (int(kp["x"]), int(kp["y"]))
|
| 307 |
+
color = _conf_to_bgr(kp["conf"])
|
| 308 |
+
cv2.circle(frame, pt, 4, color, -1)
|
| 309 |
+
cv2.circle(frame, pt, 5, (255, 255, 255), 1)
|
| 310 |
+
|
| 311 |
+
return frame
|
| 312 |
+
```
|
| 313 |
+
|
| 314 |
+
- [ ] **Step 4: Run tests**
|
| 315 |
+
|
| 316 |
+
```bash
|
| 317 |
+
pytest tests/test_visualizer.py::TestDrawSkeleton -v
|
| 318 |
+
```
|
| 319 |
+
|
| 320 |
+
Expected: 2 PASS
|
| 321 |
+
|
| 322 |
+
- [ ] **Step 5: Commit**
|
| 323 |
+
|
| 324 |
+
```bash
|
| 325 |
+
git add formscout/agents/visualizer.py tests/test_visualizer.py
|
| 326 |
+
git commit -m "feat: PoseVisualizer._draw_skeleton with confidence-colored joints"
|
| 327 |
+
```
|
| 328 |
+
|
| 329 |
+
---
|
| 330 |
+
|
| 331 |
+
## Task 3: `PoseVisualizer._draw_trails`
|
| 332 |
+
|
| 333 |
+
**Files:**
|
| 334 |
+
- Modify: `formscout/agents/visualizer.py`
|
| 335 |
+
- Modify: `tests/test_visualizer.py`
|
| 336 |
+
|
| 337 |
+
- [ ] **Step 1: Write failing test**
|
| 338 |
+
|
| 339 |
+
Append to `tests/test_visualizer.py`:
|
| 340 |
+
|
| 341 |
+
```python
|
| 342 |
+
class TestDrawTrails:
|
| 343 |
+
def test_trails_draw_without_error(self):
|
| 344 |
+
from formscout.agents.visualizer import PoseVisualizer, TRAIL_LENGTH
|
| 345 |
+
from collections import deque
|
| 346 |
+
vis = PoseVisualizer()
|
| 347 |
+
frame = np.zeros((480, 640, 3), dtype=np.uint8)
|
| 348 |
+
# Build a trail history for joint 0 with 5 positions
|
| 349 |
+
trail_history = {
|
| 350 |
+
0: deque([(100 + i * 5, 200 + i * 3) for i in range(5)], maxlen=TRAIL_LENGTH)
|
| 351 |
+
}
|
| 352 |
+
result = vis._draw_trails(frame.copy(), trail_history)
|
| 353 |
+
assert result.shape == frame.shape
|
| 354 |
+
# Trail should modify at least some pixels
|
| 355 |
+
assert not np.array_equal(result, frame)
|
| 356 |
+
|
| 357 |
+
def test_short_trail_no_crash(self):
|
| 358 |
+
from formscout.agents.visualizer import PoseVisualizer, TRAIL_LENGTH
|
| 359 |
+
from collections import deque
|
| 360 |
+
vis = PoseVisualizer()
|
| 361 |
+
frame = np.zeros((480, 640, 3), dtype=np.uint8)
|
| 362 |
+
# Only one point — no line possible
|
| 363 |
+
trail_history = {0: deque([(100, 200)], maxlen=TRAIL_LENGTH)}
|
| 364 |
+
result = vis._draw_trails(frame.copy(), trail_history)
|
| 365 |
+
# No crash, frame unchanged (single point = no segment)
|
| 366 |
+
assert np.array_equal(result, frame)
|
| 367 |
+
```
|
| 368 |
+
|
| 369 |
+
- [ ] **Step 2: Run to confirm failure**
|
| 370 |
+
|
| 371 |
+
```bash
|
| 372 |
+
pytest tests/test_visualizer.py::TestDrawTrails -v
|
| 373 |
+
```
|
| 374 |
+
|
| 375 |
+
Expected: FAIL — `AttributeError: 'PoseVisualizer' object has no attribute '_draw_trails'`
|
| 376 |
+
|
| 377 |
+
- [ ] **Step 3: Add `_draw_trails` to `PoseVisualizer`**
|
| 378 |
+
|
| 379 |
+
Inside the `PoseVisualizer` class, after `_draw_skeleton`:
|
| 380 |
+
|
| 381 |
+
```python
|
| 382 |
+
# ── Trails ───────────────────────────────────────────────────────────────
|
| 383 |
+
|
| 384 |
+
def _draw_trails(self, frame: np.ndarray, trail_history: dict) -> np.ndarray:
|
| 385 |
+
"""Draw fading motion trails for each joint."""
|
| 386 |
+
for joint_idx, trail in trail_history.items():
|
| 387 |
+
pts = list(trail)
|
| 388 |
+
if len(pts) < 2:
|
| 389 |
+
continue
|
| 390 |
+
for i in range(1, len(pts)):
|
| 391 |
+
alpha = i / len(pts)
|
| 392 |
+
brightness = int(255 * alpha)
|
| 393 |
+
color = (brightness, brightness, brightness)
|
| 394 |
+
thickness = max(1, int(3 * alpha))
|
| 395 |
+
p1 = (int(pts[i - 1][0]), int(pts[i - 1][1]))
|
| 396 |
+
p2 = (int(pts[i][0]), int(pts[i][1]))
|
| 397 |
+
cv2.line(frame, p1, p2, color, thickness)
|
| 398 |
+
return frame
|
| 399 |
+
```
|
| 400 |
+
|
| 401 |
+
- [ ] **Step 4: Run tests**
|
| 402 |
+
|
| 403 |
+
```bash
|
| 404 |
+
pytest tests/test_visualizer.py::TestDrawTrails -v
|
| 405 |
+
```
|
| 406 |
+
|
| 407 |
+
Expected: 2 PASS
|
| 408 |
+
|
| 409 |
+
- [ ] **Step 5: Commit**
|
| 410 |
+
|
| 411 |
+
```bash
|
| 412 |
+
git add formscout/agents/visualizer.py tests/test_visualizer.py
|
| 413 |
+
git commit -m "feat: PoseVisualizer._draw_trails with fading alpha"
|
| 414 |
+
```
|
| 415 |
+
|
| 416 |
+
---
|
| 417 |
+
|
| 418 |
+
## Task 4: `PoseVisualizer._draw_velocity_arrows`
|
| 419 |
+
|
| 420 |
+
**Files:**
|
| 421 |
+
- Modify: `formscout/agents/visualizer.py`
|
| 422 |
+
- Modify: `tests/test_visualizer.py`
|
| 423 |
+
|
| 424 |
+
- [ ] **Step 1: Write failing test**
|
| 425 |
+
|
| 426 |
+
Append to `tests/test_visualizer.py`:
|
| 427 |
+
|
| 428 |
+
```python
|
| 429 |
+
class TestDrawVelocityArrows:
|
| 430 |
+
def test_arrows_draw_without_error(self):
|
| 431 |
+
from formscout.agents.visualizer import PoseVisualizer
|
| 432 |
+
vis = PoseVisualizer()
|
| 433 |
+
frame = np.zeros((480, 640, 3), dtype=np.uint8)
|
| 434 |
+
kps = {j: {"x": float(50 + j * 30), "y": float(100 + j * 20), "conf": 0.9}
|
| 435 |
+
for j in range(17)}
|
| 436 |
+
prev_kps = {j: {"x": float(48 + j * 30), "y": float(98 + j * 20), "conf": 0.9}
|
| 437 |
+
for j in range(17)}
|
| 438 |
+
# velocities: joint 5 moving fast
|
| 439 |
+
velocities = {j: [0.0] * 5 for j in range(17)}
|
| 440 |
+
velocities[5] = [0.0, 10.0, 50.0, 80.0, 120.0]
|
| 441 |
+
result = vis._draw_velocity_arrows(frame.copy(), kps, prev_kps, velocities, frame_idx=4)
|
| 442 |
+
assert result.shape == frame.shape
|
| 443 |
+
|
| 444 |
+
def test_no_prev_kps_no_crash(self):
|
| 445 |
+
from formscout.agents.visualizer import PoseVisualizer
|
| 446 |
+
vis = PoseVisualizer()
|
| 447 |
+
frame = np.zeros((480, 640, 3), dtype=np.uint8)
|
| 448 |
+
kps = {j: {"x": float(50 + j * 30), "y": 100.0, "conf": 0.9} for j in range(17)}
|
| 449 |
+
velocities = {j: [50.0] * 5 for j in range(17)}
|
| 450 |
+
# prev_kps is None — should skip without crash
|
| 451 |
+
result = vis._draw_velocity_arrows(frame.copy(), kps, None, velocities, frame_idx=0)
|
| 452 |
+
assert result.shape == frame.shape
|
| 453 |
+
```
|
| 454 |
+
|
| 455 |
+
- [ ] **Step 2: Run to confirm failure**
|
| 456 |
+
|
| 457 |
+
```bash
|
| 458 |
+
pytest tests/test_visualizer.py::TestDrawVelocityArrows -v
|
| 459 |
+
```
|
| 460 |
+
|
| 461 |
+
Expected: FAIL — `AttributeError: 'PoseVisualizer' object has no attribute '_draw_velocity_arrows'`
|
| 462 |
+
|
| 463 |
+
- [ ] **Step 3: Add `_draw_velocity_arrows` to `PoseVisualizer`**
|
| 464 |
+
|
| 465 |
+
Inside the `PoseVisualizer` class, after `_draw_trails`:
|
| 466 |
+
|
| 467 |
+
```python
|
| 468 |
+
# ── Velocity arrows ───────────────────────────────────────────────────────
|
| 469 |
+
|
| 470 |
+
def _draw_velocity_arrows(
|
| 471 |
+
self,
|
| 472 |
+
frame: np.ndarray,
|
| 473 |
+
kps: dict,
|
| 474 |
+
prev_kps: dict | None,
|
| 475 |
+
velocities: dict[int, list[float]],
|
| 476 |
+
frame_idx: int,
|
| 477 |
+
) -> np.ndarray:
|
| 478 |
+
"""Draw per-joint velocity arrows scaled by speed."""
|
| 479 |
+
if prev_kps is None:
|
| 480 |
+
return frame
|
| 481 |
+
|
| 482 |
+
all_speeds = [velocities[j][frame_idx] for j in range(17) if frame_idx < len(velocities.get(j, []))]
|
| 483 |
+
peak = max(all_speeds) if all_speeds else 1.0
|
| 484 |
+
if peak == 0.0:
|
| 485 |
+
return frame
|
| 486 |
+
|
| 487 |
+
for j in range(17):
|
| 488 |
+
kp = kps.get(j)
|
| 489 |
+
pk = prev_kps.get(j)
|
| 490 |
+
if not kp or not pk:
|
| 491 |
+
continue
|
| 492 |
+
if kp.get("conf", 0.0) < CONF_THRESHOLD:
|
| 493 |
+
continue
|
| 494 |
+
speeds = velocities.get(j, [])
|
| 495 |
+
if frame_idx >= len(speeds):
|
| 496 |
+
continue
|
| 497 |
+
speed = speeds[frame_idx]
|
| 498 |
+
if speed == 0.0:
|
| 499 |
+
continue
|
| 500 |
+
|
| 501 |
+
dx = kp["x"] - pk["x"]
|
| 502 |
+
dy = kp["y"] - pk["y"]
|
| 503 |
+
mag = math.sqrt(dx * dx + dy * dy)
|
| 504 |
+
if mag < 1e-6:
|
| 505 |
+
continue
|
| 506 |
+
|
| 507 |
+
# Normalize direction, scale to arrow length
|
| 508 |
+
length = min(speed / peak * MAX_ARROW_PX, MAX_ARROW_PX)
|
| 509 |
+
nx, ny = dx / mag, dy / mag
|
| 510 |
+
start = (int(kp["x"]), int(kp["y"]))
|
| 511 |
+
end = (int(kp["x"] + nx * length), int(kp["y"] + ny * length))
|
| 512 |
+
|
| 513 |
+
ratio = speed / peak
|
| 514 |
+
if ratio < 0.33:
|
| 515 |
+
color = (0, 200, 0) # green
|
| 516 |
+
elif ratio < 0.66:
|
| 517 |
+
color = (0, 140, 255) # orange
|
| 518 |
+
else:
|
| 519 |
+
color = (0, 0, 255) # red
|
| 520 |
+
|
| 521 |
+
cv2.arrowedLine(frame, start, end, color, 2, tipLength=0.35)
|
| 522 |
+
|
| 523 |
+
return frame
|
| 524 |
+
```
|
| 525 |
+
|
| 526 |
+
- [ ] **Step 4: Run tests**
|
| 527 |
+
|
| 528 |
+
```bash
|
| 529 |
+
pytest tests/test_visualizer.py::TestDrawVelocityArrows -v
|
| 530 |
+
```
|
| 531 |
+
|
| 532 |
+
Expected: 2 PASS
|
| 533 |
+
|
| 534 |
+
- [ ] **Step 5: Commit**
|
| 535 |
+
|
| 536 |
+
```bash
|
| 537 |
+
git add formscout/agents/visualizer.py tests/test_visualizer.py
|
| 538 |
+
git commit -m "feat: PoseVisualizer._draw_velocity_arrows speed-colored"
|
| 539 |
+
```
|
| 540 |
+
|
| 541 |
+
---
|
| 542 |
+
|
| 543 |
+
## Task 5: `render_video` + `build_velocity_summary`
|
| 544 |
+
|
| 545 |
+
**Files:**
|
| 546 |
+
- Modify: `formscout/agents/visualizer.py`
|
| 547 |
+
- Modify: `tests/test_visualizer.py`
|
| 548 |
+
|
| 549 |
+
- [ ] **Step 1: Write failing tests**
|
| 550 |
+
|
| 551 |
+
Append to `tests/test_visualizer.py`:
|
| 552 |
+
|
| 553 |
+
```python
|
| 554 |
+
class TestRenderVideo:
|
| 555 |
+
def test_creates_mp4_file(self, tmp_path):
|
| 556 |
+
from formscout.agents.visualizer import PoseVisualizer
|
| 557 |
+
vis = PoseVisualizer()
|
| 558 |
+
ingest = _make_ingest(n=5)
|
| 559 |
+
pose = _make_pose(n=5)
|
| 560 |
+
out = str(tmp_path / "out.mp4")
|
| 561 |
+
result = vis.render_video(ingest, pose, {"skeleton"}, out)
|
| 562 |
+
assert result is not None
|
| 563 |
+
import os
|
| 564 |
+
assert os.path.exists(result)
|
| 565 |
+
assert os.path.getsize(result) > 0
|
| 566 |
+
|
| 567 |
+
def test_empty_layers_returns_none(self, tmp_path):
|
| 568 |
+
from formscout.agents.visualizer import PoseVisualizer
|
| 569 |
+
vis = PoseVisualizer()
|
| 570 |
+
out = str(tmp_path / "out.mp4")
|
| 571 |
+
result = vis.render_video(_make_ingest(), _make_pose(), set(), out)
|
| 572 |
+
assert result is None
|
| 573 |
+
|
| 574 |
+
def test_no_detections_returns_none(self, tmp_path):
|
| 575 |
+
from formscout.agents.visualizer import PoseVisualizer
|
| 576 |
+
vis = PoseVisualizer()
|
| 577 |
+
ingest = _make_ingest(n=5)
|
| 578 |
+
empty_pose = Pose2DResult(
|
| 579 |
+
keypoints=[{} for _ in range(5)], fps=30.0, confidence=0.0, notes=""
|
| 580 |
+
)
|
| 581 |
+
out = str(tmp_path / "out.mp4")
|
| 582 |
+
result = vis.render_video(ingest, empty_pose, {"skeleton"}, out)
|
| 583 |
+
assert result is None
|
| 584 |
+
|
| 585 |
+
def test_last_velocities_set_after_render(self, tmp_path):
|
| 586 |
+
from formscout.agents.visualizer import PoseVisualizer
|
| 587 |
+
vis = PoseVisualizer()
|
| 588 |
+
out = str(tmp_path / "out.mp4")
|
| 589 |
+
vis.render_video(_make_ingest(n=5), _make_pose(n=5), {"skeleton"}, out)
|
| 590 |
+
assert len(vis.last_velocities) == 17
|
| 591 |
+
|
| 592 |
+
|
| 593 |
+
class TestBuildVelocitySummary:
|
| 594 |
+
def test_returns_markdown_table(self):
|
| 595 |
+
from formscout.agents.visualizer import build_velocity_summary, compute_joint_velocity
|
| 596 |
+
pose = _make_pose(n=10)
|
| 597 |
+
vels = compute_joint_velocity(pose.keypoints, fps=30.0)
|
| 598 |
+
result = build_velocity_summary(pose.keypoints, vels)
|
| 599 |
+
assert "|" in result
|
| 600 |
+
# At least one COCO joint name appears
|
| 601 |
+
assert any(name in result for name in ["knee", "shoulder", "hip", "ankle"])
|
| 602 |
+
|
| 603 |
+
def test_empty_keypoints_returns_empty_string(self):
|
| 604 |
+
from formscout.agents.visualizer import build_velocity_summary
|
| 605 |
+
empty_kps = [{} for _ in range(5)]
|
| 606 |
+
vels = {j: [0.0] * 5 for j in range(17)}
|
| 607 |
+
result = build_velocity_summary(empty_kps, vels)
|
| 608 |
+
assert result == ""
|
| 609 |
+
```
|
| 610 |
+
|
| 611 |
+
- [ ] **Step 2: Run to confirm failure**
|
| 612 |
+
|
| 613 |
+
```bash
|
| 614 |
+
pytest tests/test_visualizer.py::TestRenderVideo tests/test_visualizer.py::TestBuildVelocitySummary -v
|
| 615 |
+
```
|
| 616 |
+
|
| 617 |
+
Expected: FAIL — `AttributeError: 'PoseVisualizer' object has no attribute 'render_video'`
|
| 618 |
+
|
| 619 |
+
- [ ] **Step 3: Add `render_video` to `PoseVisualizer`**
|
| 620 |
+
|
| 621 |
+
Inside the `PoseVisualizer` class, after `_draw_velocity_arrows`:
|
| 622 |
+
|
| 623 |
+
```python
|
| 624 |
+
# ── Public ────────────────────────────────────────────────────────────────
|
| 625 |
+
|
| 626 |
+
def render_video(
|
| 627 |
+
self,
|
| 628 |
+
ingest,
|
| 629 |
+
pose2d,
|
| 630 |
+
layers: set[str],
|
| 631 |
+
output_path: str,
|
| 632 |
+
) -> str | None:
|
| 633 |
+
"""
|
| 634 |
+
Render annotated video. Returns output_path on success, None otherwise.
|
| 635 |
+
layers: subset of {"skeleton", "trails", "velocity_arrows"}
|
| 636 |
+
"""
|
| 637 |
+
if not layers:
|
| 638 |
+
return None
|
| 639 |
+
|
| 640 |
+
# Require at least one detected frame
|
| 641 |
+
if not any(pose2d.keypoints):
|
| 642 |
+
return None
|
| 643 |
+
|
| 644 |
+
try:
|
| 645 |
+
velocities = compute_joint_velocity(pose2d.keypoints, ingest.fps)
|
| 646 |
+
self.last_velocities = velocities
|
| 647 |
+
|
| 648 |
+
frames = ingest.frames
|
| 649 |
+
h, w = frames[0].shape[:2]
|
| 650 |
+
fps = ingest.fps or 30.0
|
| 651 |
+
|
| 652 |
+
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
|
| 653 |
+
writer = cv2.VideoWriter(output_path, fourcc, fps, (w, h))
|
| 654 |
+
if not writer.isOpened():
|
| 655 |
+
logger.warning("VideoWriter failed to open: %s", output_path)
|
| 656 |
+
return None
|
| 657 |
+
|
| 658 |
+
trail_history: dict[int, deque] = {j: deque(maxlen=TRAIL_LENGTH) for j in range(17)}
|
| 659 |
+
prev_kps: dict | None = None
|
| 660 |
+
|
| 661 |
+
for frame_idx, (frame, kps) in enumerate(zip(frames, pose2d.keypoints)):
|
| 662 |
+
out_frame = frame.copy()
|
| 663 |
+
|
| 664 |
+
if "trails" in layers:
|
| 665 |
+
# Update trail history before drawing
|
| 666 |
+
for j, kp in kps.items():
|
| 667 |
+
if kp.get("conf", 0.0) >= CONF_THRESHOLD:
|
| 668 |
+
trail_history[j].append((kp["x"], kp["y"]))
|
| 669 |
+
out_frame = self._draw_trails(out_frame, trail_history)
|
| 670 |
+
|
| 671 |
+
if "skeleton" in layers:
|
| 672 |
+
out_frame = self._draw_skeleton(out_frame, kps)
|
| 673 |
+
|
| 674 |
+
if "velocity_arrows" in layers:
|
| 675 |
+
out_frame = self._draw_velocity_arrows(
|
| 676 |
+
out_frame, kps, prev_kps, velocities, frame_idx
|
| 677 |
+
)
|
| 678 |
+
|
| 679 |
+
writer.write(out_frame)
|
| 680 |
+
prev_kps = kps
|
| 681 |
+
|
| 682 |
+
writer.release()
|
| 683 |
+
return output_path
|
| 684 |
+
|
| 685 |
+
except Exception as e:
|
| 686 |
+
logger.warning("render_video failed: %s", e)
|
| 687 |
+
return None
|
| 688 |
+
```
|
| 689 |
+
|
| 690 |
+
- [ ] **Step 4: Add `build_velocity_summary` after the class**
|
| 691 |
+
|
| 692 |
+
After the `PoseVisualizer` class definition, add:
|
| 693 |
+
|
| 694 |
+
```python
|
| 695 |
+
# ── Velocity summary ──────────────────────────────────────────────────────────
|
| 696 |
+
|
| 697 |
+
def build_velocity_summary(
|
| 698 |
+
keypoints_per_frame: list[dict],
|
| 699 |
+
velocities: dict[int, list[float]],
|
| 700 |
+
) -> str:
|
| 701 |
+
"""Return markdown table of per-joint avg/peak velocity. Empty string if no valid joints."""
|
| 702 |
+
n_frames = len(keypoints_per_frame)
|
| 703 |
+
if n_frames == 0:
|
| 704 |
+
return ""
|
| 705 |
+
|
| 706 |
+
rows = []
|
| 707 |
+
for j in range(17):
|
| 708 |
+
# Count frames where this joint is detected
|
| 709 |
+
detected = sum(
|
| 710 |
+
1 for kps in keypoints_per_frame
|
| 711 |
+
if kps.get(j, {}).get("conf", 0.0) >= CONF_THRESHOLD
|
| 712 |
+
)
|
| 713 |
+
if detected < n_frames * 0.5:
|
| 714 |
+
continue # skip joints present in <50% of frames
|
| 715 |
+
|
| 716 |
+
speeds = velocities.get(j, [])
|
| 717 |
+
if not speeds:
|
| 718 |
+
continue
|
| 719 |
+
|
| 720 |
+
avg_speed = sum(speeds) / len(speeds)
|
| 721 |
+
peak_speed = max(speeds)
|
| 722 |
+
rows.append((COCO_KEYPOINTS[j], avg_speed, peak_speed))
|
| 723 |
+
|
| 724 |
+
if not rows:
|
| 725 |
+
return ""
|
| 726 |
+
|
| 727 |
+
rows.sort(key=lambda r: r[2], reverse=True) # sort by peak descending
|
| 728 |
+
lines = [
|
| 729 |
+
"| Joint | Avg (px/s) | Peak (px/s) |",
|
| 730 |
+
"|---|---|---|",
|
| 731 |
+
]
|
| 732 |
+
for name, avg, peak in rows:
|
| 733 |
+
lines.append(f"| {name} | {avg:.1f} | {peak:.1f} |")
|
| 734 |
+
return "\n".join(lines)
|
| 735 |
+
```
|
| 736 |
+
|
| 737 |
+
- [ ] **Step 5: Run all visualizer tests**
|
| 738 |
+
|
| 739 |
+
```bash
|
| 740 |
+
pytest tests/test_visualizer.py -v
|
| 741 |
+
```
|
| 742 |
+
|
| 743 |
+
Expected: all tests PASS (4 + 2 + 2 + 2 + 4 + 2 = 16 total)
|
| 744 |
+
|
| 745 |
+
- [ ] **Step 6: Commit**
|
| 746 |
+
|
| 747 |
+
```bash
|
| 748 |
+
git add formscout/agents/visualizer.py tests/test_visualizer.py
|
| 749 |
+
git commit -m "feat: PoseVisualizer.render_video + build_velocity_summary (16 tests pass)"
|
| 750 |
+
```
|
| 751 |
+
|
| 752 |
+
---
|
| 753 |
+
|
| 754 |
+
## Task 6: Wire `app.py`
|
| 755 |
+
|
| 756 |
+
**Files:**
|
| 757 |
+
- Modify: `app.py`
|
| 758 |
+
|
| 759 |
+
- [ ] **Step 1: Add `import tempfile` if not present and import visualizer in `process_video`**
|
| 760 |
+
|
| 761 |
+
Check the top of `app.py` for `import tempfile`. If missing, add it alongside the other stdlib imports. (Look at the existing import block and add `import tempfile` there.)
|
| 762 |
+
|
| 763 |
+
- [ ] **Step 2: Update `process_video()` signature and body**
|
| 764 |
+
|
| 765 |
+
Replace the existing `process_video` function (lines 46–83) with:
|
| 766 |
+
|
| 767 |
+
```python
|
| 768 |
+
def process_video(video_path: str, test_name: str, side: str, model_key: str, layers: list[str]):
|
| 769 |
+
"""Process an uploaded video through the FormScout pipeline."""
|
| 770 |
+
if not video_path:
|
| 771 |
+
return (
|
| 772 |
+
_render_empty_state(),
|
| 773 |
+
"Upload a video to begin analysis.",
|
| 774 |
+
"",
|
| 775 |
+
"",
|
| 776 |
+
None,
|
| 777 |
+
"",
|
| 778 |
+
)
|
| 779 |
+
|
| 780 |
+
director = Director()
|
| 781 |
+
state = director.run(video_path, test_name=test_name, side=side, model_key=model_key)
|
| 782 |
+
|
| 783 |
+
# ─── Score card ───
|
| 784 |
+
score_html = _render_empty_state()
|
| 785 |
+
score_details = ""
|
| 786 |
+
|
| 787 |
+
if state.features:
|
| 788 |
+
result = score_test(state.features)
|
| 789 |
+
judge = state.judge
|
| 790 |
+
if judge and judge.score is not None:
|
| 791 |
+
score_html = _render_score_card(judge.score, judge.confidence, judge.needs_human)
|
| 792 |
+
score_details = _render_score_details_judge(judge, result, state.features)
|
| 793 |
+
elif judge and judge.needs_human:
|
| 794 |
+
score_html = _render_score_card(0, 0, True)
|
| 795 |
+
score_details = f"### Needs Clinician Review\n{judge.rationale}"
|
| 796 |
+
else:
|
| 797 |
+
score_html = _render_score_card(result.score, result.confidence, result.needs_human)
|
| 798 |
+
score_details = _render_score_details(result, state.features)
|
| 799 |
+
|
| 800 |
+
# ─── Pipeline info ───
|
| 801 |
+
pipeline_md = _render_pipeline_status(state)
|
| 802 |
+
|
| 803 |
+
# ─── Warnings/errors ───
|
| 804 |
+
alerts = _render_alerts(state)
|
| 805 |
+
|
| 806 |
+
# ─── Overlay video ───
|
| 807 |
+
overlay_path = None
|
| 808 |
+
vel_summary = ""
|
| 809 |
+
layer_set = {lbl.lower().replace(" ", "_") for lbl in (layers or [])}
|
| 810 |
+
if layer_set and state.ingest and state.pose2d:
|
| 811 |
+
try:
|
| 812 |
+
from formscout.agents.visualizer import PoseVisualizer, build_velocity_summary
|
| 813 |
+
vis = PoseVisualizer()
|
| 814 |
+
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as f:
|
| 815 |
+
out_path = f.name
|
| 816 |
+
overlay_path = vis.render_video(state.ingest, state.pose2d, layer_set, out_path)
|
| 817 |
+
if overlay_path:
|
| 818 |
+
vel_summary = build_velocity_summary(state.pose2d.keypoints, vis.last_velocities)
|
| 819 |
+
except Exception as e:
|
| 820 |
+
alerts = (alerts or "") + f"\n⚠️ Visualizer error: {e}"
|
| 821 |
+
|
| 822 |
+
return score_html, pipeline_md, score_details, alerts, overlay_path, vel_summary
|
| 823 |
+
```
|
| 824 |
+
|
| 825 |
+
- [ ] **Step 3: Add `overlay_layers` CheckboxGroup in `build_app()`**
|
| 826 |
+
|
| 827 |
+
After the `pose_model_dropdown` block (around line 270), and before `submit_btn`:
|
| 828 |
+
|
| 829 |
+
```python
|
| 830 |
+
overlay_layers = gr.CheckboxGroup(
|
| 831 |
+
choices=["Skeleton", "Trails", "Velocity arrows"],
|
| 832 |
+
value=["Skeleton", "Trails"],
|
| 833 |
+
label="Overlay Layers",
|
| 834 |
+
)
|
| 835 |
+
```
|
| 836 |
+
|
| 837 |
+
- [ ] **Step 4: Add overlay tab in the results panel**
|
| 838 |
+
|
| 839 |
+
Inside the `with gr.Tabs():` block (after the `⚠️ Alerts` tab):
|
| 840 |
+
|
| 841 |
+
```python
|
| 842 |
+
with gr.TabItem("🎬 Overlay Video"):
|
| 843 |
+
overlay_video = gr.Video(label="Annotated Movement")
|
| 844 |
+
velocity_md = gr.Markdown("")
|
| 845 |
+
```
|
| 846 |
+
|
| 847 |
+
- [ ] **Step 5: Update `_map_inputs` and `submit_btn.click`**
|
| 848 |
+
|
| 849 |
+
Replace the `_map_inputs` closure and `submit_btn.click` call:
|
| 850 |
+
|
| 851 |
+
```python
|
| 852 |
+
def _map_inputs(video, test_display_name, side_display, pose_model_key, overlay_layers):
|
| 853 |
+
"""Map UI display values to internal values."""
|
| 854 |
+
test_map = {name: val for name, val in FMS_TESTS}
|
| 855 |
+
test_name = test_map.get(test_display_name, "deep_squat")
|
| 856 |
+
side = {"N/A": "na", "Left": "left", "Right": "right"}.get(side_display, "na")
|
| 857 |
+
return process_video(video, test_name, side, pose_model_key, overlay_layers)
|
| 858 |
+
|
| 859 |
+
submit_btn.click(
|
| 860 |
+
fn=_map_inputs,
|
| 861 |
+
inputs=[video_input, test_dropdown, side_dropdown, pose_model_dropdown, overlay_layers],
|
| 862 |
+
outputs=[score_html, pipeline_md, score_details, alerts_md, overlay_video, velocity_md],
|
| 863 |
+
)
|
| 864 |
+
```
|
| 865 |
+
|
| 866 |
+
- [ ] **Step 6: Smoke-test the app builds**
|
| 867 |
+
|
| 868 |
+
```bash
|
| 869 |
+
python3 -c "from app import build_app; build_app(); print('ok')"
|
| 870 |
+
```
|
| 871 |
+
|
| 872 |
+
Expected: `ok` (Gradio UserWarning about theme is fine, not an error)
|
| 873 |
+
|
| 874 |
+
- [ ] **Step 7: Run full test suite to check for regressions**
|
| 875 |
+
|
| 876 |
+
```bash
|
| 877 |
+
pytest tests/ -v --tb=short 2>&1 | tail -15
|
| 878 |
+
```
|
| 879 |
+
|
| 880 |
+
Expected: all previous tests still pass (62 passing, 1 pre-existing fail in biomechanics), plus 16 new visualizer tests = 78 passing.
|
| 881 |
+
|
| 882 |
+
- [ ] **Step 8: Commit**
|
| 883 |
+
|
| 884 |
+
```bash
|
| 885 |
+
git add app.py
|
| 886 |
+
git commit -m "feat: overlay video tab + velocity summary wired in Gradio UI"
|
| 887 |
+
```
|
| 888 |
+
|
| 889 |
+
---
|
| 890 |
+
|
| 891 |
+
## Self-review
|
| 892 |
+
|
| 893 |
+
**Spec coverage:**
|
| 894 |
+
- ✅ `SimpleKalmanFilter` 4-state (Task 1)
|
| 895 |
+
- ✅ `compute_joint_velocity` Kalman-filtered px/s (Task 1)
|
| 896 |
+
- ✅ `_draw_skeleton` COCO bones, confidence-colored joints (Task 2)
|
| 897 |
+
- ✅ `_draw_trails` fading deque-based trails (Task 3)
|
| 898 |
+
- ✅ `_draw_velocity_arrows` speed-colored, direction from consecutive frames (Task 4)
|
| 899 |
+
- ✅ `render_video` layer dispatch, trail history, VideoWriter (Task 5)
|
| 900 |
+
- ✅ `build_velocity_summary` markdown table, >50% detection filter (Task 5)
|
| 901 |
+
- ✅ `overlay_layers` CheckboxGroup in UI (Task 6)
|
| 902 |
+
- ✅ New `🎬 Overlay Video` tab with `gr.Video` + `gr.Markdown` (Task 6)
|
| 903 |
+
- ✅ `process_video` wired with layers param (Task 6)
|
| 904 |
+
- ✅ `vis.last_velocities` stored on instance after `render_video` (Task 5)
|
| 905 |
+
- ✅ Error handling: empty layers → None, empty detections → None, exception → alerts (Task 5 + 6)
|
| 906 |
+
- ✅ All 5 spec test cases covered across Tasks 1–5
|
| 907 |
+
|
| 908 |
+
**Placeholder scan:** None found. All code blocks are complete.
|
| 909 |
+
|
| 910 |
+
**Type consistency:**
|
| 911 |
+
- `compute_joint_velocity` returns `dict[int, list[float]]` — used identically in `render_video`, `_draw_velocity_arrows`, and `build_velocity_summary`. ✓
|
| 912 |
+
- `layers: set[str]` in `render_video`; converted from `list[str]` in `process_video` via set comprehension. ✓
|
| 913 |
+
- `vis.last_velocities` set in `render_video`, read in `process_video`. ✓
|
| 914 |
+
- `_draw_velocity_arrows(frame, kps, prev_kps, velocities, frame_idx)` — signature matches call in `render_video`. ✓
|
docs/superpowers/specs/2026-06-09-pose-model-selector-design.md
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Pose Model Selector — Design Spec
|
| 2 |
+
|
| 3 |
+
**Date:** 2026-06-09
|
| 4 |
+
**Status:** Approved
|
| 5 |
+
|
| 6 |
+
## Goal
|
| 7 |
+
|
| 8 |
+
Expose all available pose estimation models as a selectable dropdown in the Gradio UI, replacing the hard-coded YOLO26l default. Supported families: MediaPipe (Qualcomm HF/ONNX), YOLO26 n→x (local), Sapiens2 0.4B→5B (HF/transformers).
|
| 9 |
+
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
## Architecture
|
| 13 |
+
|
| 14 |
+
### Unified model registry (`config.py`)
|
| 15 |
+
|
| 16 |
+
Replace `YOLO_POSE_MODELS` with a single `POSE_MODELS` dict. Each entry:
|
| 17 |
+
|
| 18 |
+
```python
|
| 19 |
+
{
|
| 20 |
+
"backend": "yolo" | "mediapipe" | "sapiens2",
|
| 21 |
+
"path": str, # yolo only — absolute path to local .pt
|
| 22 |
+
"hf_id": str, # mediapipe + sapiens2 — HuggingFace repo id
|
| 23 |
+
"params_m": float, # millions of parameters
|
| 24 |
+
}
|
| 25 |
+
```
|
| 26 |
+
|
| 27 |
+
Ordered as displayed in the UI:
|
| 28 |
+
|
| 29 |
+
| Label | backend | source |
|
| 30 |
+
|---|---|---|
|
| 31 |
+
| `MediaPipe-Pose ⬇ ~16 MB, CPU-friendly` | mediapipe | `qualcomm/MediaPipe-Pose-Estimation` |
|
| 32 |
+
| `YOLO26n — nano (0.7M, fastest)` ★ default | yolo | local checkpoint |
|
| 33 |
+
| `YOLO26s — small (3.5M)` | yolo | local checkpoint |
|
| 34 |
+
| `YOLO26m — medium (9M)` | yolo | local checkpoint |
|
| 35 |
+
| `YOLO26l — large (25.9M)` | yolo | local checkpoint |
|
| 36 |
+
| `YOLO26x — extra-large (57.6M)` | yolo | local checkpoint |
|
| 37 |
+
| `Sapiens2-0.4B ⬇ ~1.6 GB` | sapiens2 | `facebook/sapiens2-pose-0.4b` |
|
| 38 |
+
| `Sapiens2-0.8B ⬇ ~3.2 GB` | sapiens2 | `facebook/sapiens2-pose-0.8b` |
|
| 39 |
+
| `Sapiens2-1B ⬇ ~4 GB` | sapiens2 | `facebook/sapiens2-pose-1b` |
|
| 40 |
+
| `Sapiens2-5B ⬇ ~20 GB, large GPU` | sapiens2 | `facebook/sapiens2-pose-5b` |
|
| 41 |
+
|
| 42 |
+
```python
|
| 43 |
+
DEFAULT_POSE_MODEL = "YOLO26n — nano (0.7M, fastest)"
|
| 44 |
+
```
|
| 45 |
+
|
| 46 |
+
Keep `YOLO_POSE_MODEL` and `YOLO_POSE_MODEL_HQ` as string aliases for backward compat with any direct references outside the agent.
|
| 47 |
+
|
| 48 |
+
---
|
| 49 |
+
|
| 50 |
+
### Pose2DAgent (`formscout/agents/pose2d.py`)
|
| 51 |
+
|
| 52 |
+
Three private sub-runners, all returning `list[dict[int, dict]]` (COCO 17 keypoints per frame, same format as today):
|
| 53 |
+
|
| 54 |
+
#### `_run_yolo(frames, path) -> list[dict]`
|
| 55 |
+
Existing logic, lifted into a named function. Model cached in `_model_cache[path]`.
|
| 56 |
+
|
| 57 |
+
#### `_run_mediapipe(frames, hf_id) -> list[dict]`
|
| 58 |
+
- Download repo snapshot via `huggingface_hub.snapshot_download(hf_id)`
|
| 59 |
+
- Locate the pose landmark `.onnx` file in the snapshot
|
| 60 |
+
- Load with `onnxruntime.InferenceSession`
|
| 61 |
+
- Preprocess each frame: resize to 256×256, normalize
|
| 62 |
+
- Run inference → 33 BlazePose landmarks
|
| 63 |
+
- Map BlazePose 33 → COCO 17 via fixed index table:
|
| 64 |
+
```
|
| 65 |
+
COCO 0=nose → BlazePose 0
|
| 66 |
+
COCO 1=left_eye → BlazePose 2
|
| 67 |
+
COCO 2=right_eye → BlazePose 5
|
| 68 |
+
COCO 3=left_ear → BlazePose 7
|
| 69 |
+
COCO 4=right_ear → BlazePose 8
|
| 70 |
+
COCO 5=left_shld → BlazePose 11
|
| 71 |
+
COCO 6=right_shld → BlazePose 12
|
| 72 |
+
COCO 7=left_elbow → BlazePose 13
|
| 73 |
+
COCO 8=right_elbow → BlazePose 14
|
| 74 |
+
COCO 9=left_wrist → BlazePose 15
|
| 75 |
+
COCO 10=right_wrist → BlazePose 16
|
| 76 |
+
COCO 11=left_hip → BlazePose 23
|
| 77 |
+
COCO 12=right_hip → BlazePose 24
|
| 78 |
+
COCO 13=left_knee → BlazePose 25
|
| 79 |
+
COCO 14=right_knee → BlazePose 26
|
| 80 |
+
COCO 15=left_ankle → BlazePose 27
|
| 81 |
+
COCO 16=right_ankle → BlazePose 28
|
| 82 |
+
```
|
| 83 |
+
- Session cached in `_model_cache[hf_id]`
|
| 84 |
+
|
| 85 |
+
#### `_run_sapiens2(frames, hf_id) -> list[dict]`
|
| 86 |
+
- Load via `transformers.pipeline("pose-estimation", model=hf_id)`
|
| 87 |
+
- Sapiens2 outputs 308 whole-body keypoints; map first 17 (indices 0–16) to COCO 17 — Sapiens2 preserves COCO ordering for the body subset
|
| 88 |
+
- Pipeline cached in `_model_cache[hf_id]`
|
| 89 |
+
|
| 90 |
+
#### `Pose2DAgent.run(ingest, model_key)`
|
| 91 |
+
- `model_key: str` replaces `model_path: str` (old param)
|
| 92 |
+
- Looks up `config.POSE_MODELS[model_key]` (falls back to `DEFAULT_POSE_MODEL` if key missing)
|
| 93 |
+
- Dispatches to the appropriate sub-runner
|
| 94 |
+
- Returns `Pose2DResult` — identical contract as today
|
| 95 |
+
|
| 96 |
+
---
|
| 97 |
+
|
| 98 |
+
### UI (`app.py`)
|
| 99 |
+
|
| 100 |
+
Add `gr.Dropdown` for pose model in the input column, below the test/side row:
|
| 101 |
+
|
| 102 |
+
```python
|
| 103 |
+
pose_model_dropdown = gr.Dropdown(
|
| 104 |
+
choices=list(config.POSE_MODELS.keys()),
|
| 105 |
+
value=config.DEFAULT_POSE_MODEL,
|
| 106 |
+
label="Pose Model",
|
| 107 |
+
)
|
| 108 |
+
```
|
| 109 |
+
|
| 110 |
+
Update `_map_inputs` to accept and forward `pose_model_key`:
|
| 111 |
+
|
| 112 |
+
```python
|
| 113 |
+
def _map_inputs(video, test_display_name, side_display, pose_model_key):
|
| 114 |
+
...
|
| 115 |
+
return process_video(video, test_name, side, pose_model_key)
|
| 116 |
+
```
|
| 117 |
+
|
| 118 |
+
Update `submit_btn.click` inputs to include `pose_model_dropdown`.
|
| 119 |
+
|
| 120 |
+
`process_video(video_path, test_name, side, pose_model_key)` passes `pose_model_key` through to `director.run()`, which passes it to `Pose2DAgent.run()`. Remove the old `YOLO_POSE_MODELS.get()` lookup from `process_video`.
|
| 121 |
+
|
| 122 |
+
---
|
| 123 |
+
|
| 124 |
+
## Data flow
|
| 125 |
+
|
| 126 |
+
```
|
| 127 |
+
UI dropdown (pose_model_key: str)
|
| 128 |
+
→ process_video()
|
| 129 |
+
→ Director.run(pose_model_key=...)
|
| 130 |
+
→ Pose2DAgent.run(ingest, model_key=pose_model_key)
|
| 131 |
+
→ config.POSE_MODELS[model_key] → {backend, path|hf_id}
|
| 132 |
+
→ _run_yolo / _run_mediapipe / _run_sapiens2
|
| 133 |
+
→ list[dict[int, {x, y, conf}]] (COCO 17, same contract)
|
| 134 |
+
→ Pose2DResult
|
| 135 |
+
```
|
| 136 |
+
|
| 137 |
+
---
|
| 138 |
+
|
| 139 |
+
## Error handling
|
| 140 |
+
|
| 141 |
+
- Unknown `model_key`: log warning, fall back to `DEFAULT_POSE_MODEL`
|
| 142 |
+
- ONNX file not found in MediaPipe snapshot: `Pose2DResult(confidence=0.0, notes="mediapipe onnx not found")`
|
| 143 |
+
- Sapiens2 / MediaPipe download failure: `Pose2DResult(confidence=0.0, notes=str(e))`
|
| 144 |
+
- All failures are non-fatal; pipeline continues with 0-confidence result and surfaces alert in UI
|
| 145 |
+
|
| 146 |
+
---
|
| 147 |
+
|
| 148 |
+
## Dependencies to add (`requirements.txt`)
|
| 149 |
+
|
| 150 |
+
- `onnxruntime` — MediaPipe ONNX inference
|
| 151 |
+
- `huggingface_hub` — snapshot download for MediaPipe (already likely present via transformers)
|
| 152 |
+
|
| 153 |
+
Sapiens2 uses `transformers`, already a dependency.
|
| 154 |
+
|
| 155 |
+
---
|
| 156 |
+
|
| 157 |
+
## Testing
|
| 158 |
+
|
| 159 |
+
Each new backend gets a pytest in `tests/test_pose2d.py` that:
|
| 160 |
+
- Mocks the model load (no actual HF download in CI)
|
| 161 |
+
- Passes a 3-frame synthetic IngestResult
|
| 162 |
+
- Asserts `Pose2DResult.keypoints` has 3 entries, each a dict with at most 17 int keys
|
| 163 |
+
- Asserts `confidence` is a float in [0, 1]
|
| 164 |
+
|
| 165 |
+
---
|
| 166 |
+
|
| 167 |
+
## Out of scope
|
| 168 |
+
|
| 169 |
+
- Sapiens2 / MediaPipe accuracy benchmarking
|
| 170 |
+
- Automatic backend selection based on hardware
|
| 171 |
+
- Downloading Sapiens2/MediaPipe checkpoints to local `checkpoints/` directory
|
docs/superpowers/specs/2026-06-09-pose-visualizer-design.md
ADDED
|
@@ -0,0 +1,197 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Pose Overlay Visualizer — Design Spec
|
| 2 |
+
|
| 3 |
+
**Date:** 2026-06-09
|
| 4 |
+
**Status:** Approved
|
| 5 |
+
|
| 6 |
+
## Goal
|
| 7 |
+
|
| 8 |
+
Add an annotated overlay video output to the FormScout UI showing skeleton, motion trails, and velocity arrows on top of the original footage, alongside a per-joint velocity summary table. Overlay layers are user-selectable via checkboxes. Adapted from the Laban Movement Analysis project.
|
| 9 |
+
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
## Architecture
|
| 13 |
+
|
| 14 |
+
Three files change or are created. No changes to `pipeline.py`, `types.py`, or any existing agent.
|
| 15 |
+
|
| 16 |
+
```
|
| 17 |
+
formscout/agents/visualizer.py ← new
|
| 18 |
+
tests/test_visualizer.py ← new
|
| 19 |
+
app.py ← overlay_layers checkbox, new tab, wiring
|
| 20 |
+
```
|
| 21 |
+
|
| 22 |
+
The visualizer runs **after** `director.run()` returns in `process_video()` — it is a pure post-processing step, never on the critical scoring path.
|
| 23 |
+
|
| 24 |
+
---
|
| 25 |
+
|
| 26 |
+
## Module: `formscout/agents/visualizer.py`
|
| 27 |
+
|
| 28 |
+
### `compute_joint_velocity(keypoints_per_frame, fps) → dict[int, list[float]]`
|
| 29 |
+
|
| 30 |
+
- Input: `list[dict[int, {x, y, conf}]]` (COCO-17 pixel coords per frame), `fps: float`
|
| 31 |
+
- Output: `dict[int, list[float]]` — per-joint per-frame speed in **px/s**
|
| 32 |
+
- Method: for each joint index, run a `SimpleKalmanFilter` (1D per axis, constant-velocity model, same structure as Laban's engine) over the (x, y) series. Speed = `sqrt(vx² + vy²)` from the filter's velocity state.
|
| 33 |
+
- Missing keypoints (conf < 0.3 or absent) → speed = 0.0 for that frame, filter state held.
|
| 34 |
+
|
| 35 |
+
### `SimpleKalmanFilter`
|
| 36 |
+
|
| 37 |
+
Minimal 4-state Kalman (x, y, vx, vy), identical in structure to the Laban `SimpleKalmanFilter`:
|
| 38 |
+
- Transition: constant-velocity model
|
| 39 |
+
- Measurement: position only (x, y)
|
| 40 |
+
- One instance per joint per video run
|
| 41 |
+
|
| 42 |
+
### `PoseVisualizer`
|
| 43 |
+
|
| 44 |
+
#### Constants
|
| 45 |
+
```python
|
| 46 |
+
COCO_SKELETON = [
|
| 47 |
+
(0,1),(0,2),(1,3),(2,4), # face
|
| 48 |
+
(5,6),(5,7),(7,9),(6,8),(8,10), # arms
|
| 49 |
+
(5,11),(6,12),(11,12), # torso
|
| 50 |
+
(11,13),(13,15),(12,14),(14,16), # legs
|
| 51 |
+
]
|
| 52 |
+
TRAIL_LENGTH = 10 # frames of trail history
|
| 53 |
+
MAX_ARROW_PX = 40 # arrow scaled so peak velocity → 40px length
|
| 54 |
+
CONF_THRESHOLD = 0.3 # min confidence to draw a keypoint
|
| 55 |
+
```
|
| 56 |
+
|
| 57 |
+
#### Private methods
|
| 58 |
+
|
| 59 |
+
**`_draw_skeleton(frame, kps)`**
|
| 60 |
+
- Draw each COCO bone as a line if both endpoints have conf > CONF_THRESHOLD
|
| 61 |
+
- Joint dots: color green→red by confidence using HSV (same as Laban `_confidence_to_color`)
|
| 62 |
+
- Bone color: white
|
| 63 |
+
|
| 64 |
+
**`_draw_trails(frame, trail_history, frame_idx)`**
|
| 65 |
+
- `trail_history: dict[int, deque(maxlen=TRAIL_LENGTH)]` keyed by joint index
|
| 66 |
+
- Each deque holds `(x, y)` pixel positions from previous frames
|
| 67 |
+
- Draw fading line segments: alpha = segment_position / TRAIL_LENGTH, color white
|
| 68 |
+
|
| 69 |
+
**`_draw_velocity_arrows(frame, kps, velocities, frame_idx)`**
|
| 70 |
+
- `velocities: dict[int, list[float]]` — speeds per joint per frame
|
| 71 |
+
- Direction vector from consecutive keypoint positions (x[t] - x[t-1], y[t] - y[t-1])
|
| 72 |
+
- Arrow length = `speed / peak_speed * MAX_ARROW_PX` (clamped)
|
| 73 |
+
- Drawn only for joints with conf > CONF_THRESHOLD and speed > 0
|
| 74 |
+
- Color: green=slow, orange=medium, red=fast (same thresholds as Laban intensity)
|
| 75 |
+
|
| 76 |
+
#### Public method
|
| 77 |
+
|
| 78 |
+
**`render_video(ingest, pose2d, layers: set[str], output_path: str) → str | None`**
|
| 79 |
+
- `layers`: subset of `{"skeleton", "trails", "velocity_arrows"}`
|
| 80 |
+
- If `layers` is empty → return `None` immediately
|
| 81 |
+
- Pre-computes `compute_joint_velocity(pose2d.keypoints, ingest.fps)`
|
| 82 |
+
- Iterates frames, updates `trail_history`, calls selected `_draw_*` methods
|
| 83 |
+
- Writes output via `cv2.VideoWriter` (codec: `mp4v`, same fps as ingest)
|
| 84 |
+
- Returns output path on success; `None` on any exception (logs warning)
|
| 85 |
+
|
| 86 |
+
#### Velocity summary
|
| 87 |
+
|
| 88 |
+
**`build_velocity_summary(keypoints_per_frame, velocities) → str`**
|
| 89 |
+
- For each joint with conf > 0.3 in >50% of frames:
|
| 90 |
+
- Compute avg and peak speed (px/s)
|
| 91 |
+
- Return markdown table sorted by peak speed descending:
|
| 92 |
+
```
|
| 93 |
+
| Joint | Avg (px/s) | Peak (px/s) |
|
| 94 |
+
|---------------|-----------|-------------|
|
| 95 |
+
| left_knee | 42.3 | 118.7 |
|
| 96 |
+
```
|
| 97 |
+
- Returns empty string if no valid joints
|
| 98 |
+
|
| 99 |
+
---
|
| 100 |
+
|
| 101 |
+
## UI changes: `app.py`
|
| 102 |
+
|
| 103 |
+
### Input column — overlay layer checkboxes
|
| 104 |
+
|
| 105 |
+
Below `pose_model_dropdown`, add:
|
| 106 |
+
|
| 107 |
+
```python
|
| 108 |
+
overlay_layers = gr.CheckboxGroup(
|
| 109 |
+
choices=["Skeleton", "Trails", "Velocity arrows"],
|
| 110 |
+
value=["Skeleton", "Trails"],
|
| 111 |
+
label="Overlay Layers",
|
| 112 |
+
)
|
| 113 |
+
```
|
| 114 |
+
|
| 115 |
+
### Results panel — new tab
|
| 116 |
+
|
| 117 |
+
Inside the existing `gr.Tabs()` block, add a fourth tab:
|
| 118 |
+
|
| 119 |
+
```python
|
| 120 |
+
with gr.TabItem("🎬 Overlay Video"):
|
| 121 |
+
overlay_video = gr.Video(label="Annotated Movement")
|
| 122 |
+
velocity_md = gr.Markdown("")
|
| 123 |
+
```
|
| 124 |
+
|
| 125 |
+
### `process_video()` signature
|
| 126 |
+
|
| 127 |
+
```python
|
| 128 |
+
def process_video(video_path, test_name, side, model_key, layers: list[str]):
|
| 129 |
+
```
|
| 130 |
+
|
| 131 |
+
After `director.run()`:
|
| 132 |
+
```python
|
| 133 |
+
from formscout.agents.visualizer import PoseVisualizer, build_velocity_summary
|
| 134 |
+
layer_set = {l.lower().replace(" ", "_") for l in layers}
|
| 135 |
+
# map UI labels to internal names:
|
| 136 |
+
# "Skeleton" → "skeleton", "Trails" → "trails", "Velocity arrows" → "velocity_arrows"
|
| 137 |
+
overlay_path = None
|
| 138 |
+
vel_summary = ""
|
| 139 |
+
if layer_set and state.ingest and state.pose2d:
|
| 140 |
+
try:
|
| 141 |
+
vis = PoseVisualizer()
|
| 142 |
+
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as f:
|
| 143 |
+
out_path = f.name
|
| 144 |
+
overlay_path = vis.render_video(state.ingest, state.pose2d, layer_set, out_path)
|
| 145 |
+
if overlay_path:
|
| 146 |
+
vel_summary = build_velocity_summary(state.pose2d.keypoints, vis.last_velocities)
|
| 147 |
+
except Exception as e:
|
| 148 |
+
alerts += f"\n⚠️ Visualizer error: {e}"
|
| 149 |
+
return score_html, pipeline_md, score_details, alerts, overlay_path, vel_summary
|
| 150 |
+
```
|
| 151 |
+
|
| 152 |
+
`vis.last_velocities` is stored on the instance after `render_video()` to avoid recomputing.
|
| 153 |
+
|
| 154 |
+
### Event wiring
|
| 155 |
+
|
| 156 |
+
```python
|
| 157 |
+
submit_btn.click(
|
| 158 |
+
fn=_map_inputs,
|
| 159 |
+
inputs=[video_input, test_dropdown, side_dropdown, pose_model_dropdown, overlay_layers],
|
| 160 |
+
outputs=[score_html, pipeline_md, score_details, alerts_md, overlay_video, velocity_md],
|
| 161 |
+
)
|
| 162 |
+
```
|
| 163 |
+
|
| 164 |
+
`_map_inputs` gains `overlay_layers` as fifth parameter.
|
| 165 |
+
|
| 166 |
+
---
|
| 167 |
+
|
| 168 |
+
## Error handling
|
| 169 |
+
|
| 170 |
+
| Failure | Behaviour |
|
| 171 |
+
|---|---|
|
| 172 |
+
| All frames have no detections | `render_video()` returns `None`, tab empty, no crash |
|
| 173 |
+
| `cv2.VideoWriter` fails | logs warning, returns `None` |
|
| 174 |
+
| Any exception in visualizer | caught in `process_video()`, appended to alerts, `overlay_path = None` |
|
| 175 |
+
| `layers` is empty | returns `None` immediately, no processing |
|
| 176 |
+
|
| 177 |
+
The score is always returned regardless of visualizer outcome.
|
| 178 |
+
|
| 179 |
+
---
|
| 180 |
+
|
| 181 |
+
## Testing: `tests/test_visualizer.py`
|
| 182 |
+
|
| 183 |
+
- Synthetic `IngestResult`: 5 blank 480×640 BGR frames, fps=30
|
| 184 |
+
- Synthetic `Pose2DResult`: 17 keypoints per frame at fixed positions with conf=0.9
|
| 185 |
+
- `test_render_video_creates_file`: assert output `.mp4` exists and size > 0
|
| 186 |
+
- `test_compute_joint_velocity_shape`: assert 17-key dict, each list length == 5
|
| 187 |
+
- `test_empty_layers_returns_none`: assert `render_video(..., layers=set())` returns `None`
|
| 188 |
+
- `test_no_detections_returns_none`: all-empty keypoints → `None`
|
| 189 |
+
- `test_velocity_summary_markdown`: assert output contains `|` (table) and at least one joint name
|
| 190 |
+
|
| 191 |
+
---
|
| 192 |
+
|
| 193 |
+
## Out of scope
|
| 194 |
+
|
| 195 |
+
- Frame-by-frame metrics synced to video playback (Phase 4 / custom Svelte)
|
| 196 |
+
- Multi-person tracking
|
| 197 |
+
- Saving overlay video to Hugging Face Hub (tracing feature, Phase 4)
|
formscout.egg-info/PKG-INFO
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
Metadata-Version: 2.4
|
| 2 |
-
Name: formscout
|
| 3 |
-
Version: 0.1.0
|
| 4 |
-
Requires-Python: >=3.11
|
|
|
|
| 1 |
+
Metadata-Version: 2.4
|
| 2 |
+
Name: formscout
|
| 3 |
+
Version: 0.1.0
|
| 4 |
+
Requires-Python: >=3.11
|
formscout.egg-info/SOURCES.txt
CHANGED
|
@@ -1,26 +1,38 @@
|
|
| 1 |
-
README.md
|
| 2 |
-
pyproject.toml
|
| 3 |
-
formscout/__init__.py
|
| 4 |
-
formscout/config.py
|
| 5 |
-
formscout/pipeline.py
|
| 6 |
-
formscout/run.py
|
| 7 |
-
formscout/tracing.py
|
| 8 |
-
formscout/types.py
|
| 9 |
-
formscout.egg-info/PKG-INFO
|
| 10 |
-
formscout.egg-info/SOURCES.txt
|
| 11 |
-
formscout.egg-info/dependency_links.txt
|
| 12 |
-
formscout.egg-info/top_level.txt
|
| 13 |
-
formscout/agents/__init__.py
|
| 14 |
-
formscout/agents/biomechanics.py
|
| 15 |
-
formscout/agents/body3d.py
|
| 16 |
-
formscout/agents/
|
| 17 |
-
formscout/agents/
|
| 18 |
-
formscout/
|
| 19 |
-
formscout/
|
| 20 |
-
formscout/
|
| 21 |
-
formscout/
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
tests/test_types.py
|
|
|
|
| 1 |
+
README.md
|
| 2 |
+
pyproject.toml
|
| 3 |
+
formscout/__init__.py
|
| 4 |
+
formscout/config.py
|
| 5 |
+
formscout/pipeline.py
|
| 6 |
+
formscout/run.py
|
| 7 |
+
formscout/tracing.py
|
| 8 |
+
formscout/types.py
|
| 9 |
+
formscout.egg-info/PKG-INFO
|
| 10 |
+
formscout.egg-info/SOURCES.txt
|
| 11 |
+
formscout.egg-info/dependency_links.txt
|
| 12 |
+
formscout.egg-info/top_level.txt
|
| 13 |
+
formscout/agents/__init__.py
|
| 14 |
+
formscout/agents/biomechanics.py
|
| 15 |
+
formscout/agents/body3d.py
|
| 16 |
+
formscout/agents/classifier.py
|
| 17 |
+
formscout/agents/ingest.py
|
| 18 |
+
formscout/agents/judge.py
|
| 19 |
+
formscout/agents/pose2d.py
|
| 20 |
+
formscout/agents/report.py
|
| 21 |
+
formscout/rubric/__init__.py
|
| 22 |
+
formscout/rubric/active_slr.py
|
| 23 |
+
formscout/rubric/deep_squat.py
|
| 24 |
+
formscout/rubric/hurdle_step.py
|
| 25 |
+
formscout/rubric/inline_lunge.py
|
| 26 |
+
formscout/rubric/rotary_stability.py
|
| 27 |
+
formscout/rubric/shoulder_mobility.py
|
| 28 |
+
formscout/rubric/trunk_stability_pushup.py
|
| 29 |
+
formscout/serving/__init__.py
|
| 30 |
+
formscout/serving/llama_cpp.py
|
| 31 |
+
formscout/ui/__init__.py
|
| 32 |
+
formscout/ui/theme.py
|
| 33 |
+
tests/test_biomechanics.py
|
| 34 |
+
tests/test_body3d.py
|
| 35 |
+
tests/test_ingest.py
|
| 36 |
+
tests/test_phase2.py
|
| 37 |
+
tests/test_pose2d.py
|
| 38 |
tests/test_types.py
|
formscout.egg-info/dependency_links.txt
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
|
formscout.egg-info/top_level.txt
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
formscout
|
|
|
|
| 1 |
+
formscout
|
formscout/agents/pose2d.py
CHANGED
|
@@ -1,95 +1,232 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Pose2DAgent — 2D per-frame keypoint extraction
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
"
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Pose2DAgent — 2D per-frame keypoint extraction.
|
| 3 |
+
|
| 4 |
+
Backends: yolo (local checkpoints, ultralytics), mediapipe (official Tasks API,
|
| 5 |
+
local .task checkpoint), sapiens2 (Meta HF/transformers).
|
| 6 |
+
All backends output COCO-17 keypoints: dict[int, {x, y, conf}] per frame.
|
| 7 |
+
|
| 8 |
+
Input: IngestResult
|
| 9 |
+
Output: Pose2DResult(keypoints per frame, fps, confidence)
|
| 10 |
+
Failure: Pose2DResult(confidence=0.0, notes=<reason>) — never raises.
|
| 11 |
+
Gated: yolo=no; mediapipe=no (local checkpoint); sapiens2=yes (access accepted).
|
| 12 |
+
"""
|
| 13 |
+
from __future__ import annotations
|
| 14 |
+
|
| 15 |
+
import logging
|
| 16 |
+
import numpy as np
|
| 17 |
+
|
| 18 |
+
from formscout import config
|
| 19 |
+
from formscout.types import IngestResult, Pose2DResult
|
| 20 |
+
|
| 21 |
+
logger = logging.getLogger(__name__)
|
| 22 |
+
|
| 23 |
+
COCO_KEYPOINTS = [
|
| 24 |
+
"nose", "left_eye", "right_eye", "left_ear", "right_ear",
|
| 25 |
+
"left_shoulder", "right_shoulder", "left_elbow", "right_elbow",
|
| 26 |
+
"left_wrist", "right_wrist", "left_hip", "right_hip",
|
| 27 |
+
"left_knee", "right_knee", "left_ankle", "right_ankle",
|
| 28 |
+
]
|
| 29 |
+
|
| 30 |
+
# BlazePose-33 source indices → COCO-17 target indices
|
| 31 |
+
# BlazePose: 0=nose, 2=left_eye, 5=right_eye, 7=left_ear, 8=right_ear,
|
| 32 |
+
# 11=left_shoulder, 12=right_shoulder, 13=left_elbow, 14=right_elbow,
|
| 33 |
+
# 15=left_wrist, 16=right_wrist, 23=left_hip, 24=right_hip,
|
| 34 |
+
# 25=left_knee, 26=right_knee, 27=left_ankle, 28=right_ankle
|
| 35 |
+
_BP_SRC = [0, 2, 5, 7, 8, 11, 12, 13, 14, 15, 16, 23, 24, 25, 26, 27, 28]
|
| 36 |
+
_BP_DST = list(range(17)) # COCO indices 0..16
|
| 37 |
+
|
| 38 |
+
_model_cache: dict[str, object] = {}
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
# ── YOLO backend ──────────────────────────────────────────────────────────────
|
| 42 |
+
|
| 43 |
+
def _get_yolo(path: str) -> object:
|
| 44 |
+
if path not in _model_cache:
|
| 45 |
+
from ultralytics import YOLO
|
| 46 |
+
_model_cache[path] = YOLO(path)
|
| 47 |
+
return _model_cache[path]
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def _run_yolo(frames: list, path: str) -> list[dict]:
|
| 51 |
+
model = _get_yolo(path)
|
| 52 |
+
out = []
|
| 53 |
+
for frame in frames:
|
| 54 |
+
try:
|
| 55 |
+
results = model(frame, verbose=False)
|
| 56 |
+
kps: dict[int, dict] = {}
|
| 57 |
+
if results and results[0].keypoints is not None:
|
| 58 |
+
kp = results[0].keypoints
|
| 59 |
+
if kp.xy is not None and len(kp.xy) > 0:
|
| 60 |
+
xy = kp.xy[0].cpu().numpy()
|
| 61 |
+
conf = kp.conf[0].cpu().numpy()
|
| 62 |
+
for j in range(min(len(xy), 17)):
|
| 63 |
+
kps[j] = {"x": float(xy[j, 0]), "y": float(xy[j, 1]), "conf": float(conf[j])}
|
| 64 |
+
out.append(kps)
|
| 65 |
+
except Exception:
|
| 66 |
+
out.append({})
|
| 67 |
+
return out
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
# ── MediaPipe backend (official Tasks API, local .task checkpoint) ────────────
|
| 71 |
+
|
| 72 |
+
def _get_mediapipe_landmarker(path: str) -> object:
|
| 73 |
+
"""Return PoseLandmarker cached by model path."""
|
| 74 |
+
cache_key = f"mp:{path}"
|
| 75 |
+
if cache_key not in _model_cache:
|
| 76 |
+
from mediapipe.tasks import python as mp_tasks
|
| 77 |
+
from mediapipe.tasks.python import vision
|
| 78 |
+
|
| 79 |
+
options = vision.PoseLandmarkerOptions(
|
| 80 |
+
base_options=mp_tasks.BaseOptions(model_asset_path=path),
|
| 81 |
+
running_mode=vision.RunningMode.IMAGE,
|
| 82 |
+
num_poses=1,
|
| 83 |
+
min_pose_detection_confidence=0.4,
|
| 84 |
+
min_pose_presence_confidence=0.4,
|
| 85 |
+
min_tracking_confidence=0.4,
|
| 86 |
+
)
|
| 87 |
+
_model_cache[cache_key] = vision.PoseLandmarker.create_from_options(options)
|
| 88 |
+
return _model_cache[cache_key]
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def _run_mediapipe(frames: list, path: str) -> list[dict]:
|
| 92 |
+
import cv2
|
| 93 |
+
import mediapipe as mp
|
| 94 |
+
|
| 95 |
+
try:
|
| 96 |
+
landmarker = _get_mediapipe_landmarker(path)
|
| 97 |
+
except Exception as e:
|
| 98 |
+
logger.warning("mediapipe load failed: %s", e)
|
| 99 |
+
return [{} for _ in frames]
|
| 100 |
+
|
| 101 |
+
out = []
|
| 102 |
+
for frame in frames:
|
| 103 |
+
try:
|
| 104 |
+
h, w = frame.shape[:2]
|
| 105 |
+
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| 106 |
+
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb)
|
| 107 |
+
detection = landmarker.detect(mp_image)
|
| 108 |
+
|
| 109 |
+
kps: dict[int, dict] = {}
|
| 110 |
+
if detection.pose_landmarks:
|
| 111 |
+
lms = detection.pose_landmarks[0]
|
| 112 |
+
for coco_idx, bp_idx in zip(_BP_DST, _BP_SRC):
|
| 113 |
+
if bp_idx < len(lms):
|
| 114 |
+
lm = lms[bp_idx]
|
| 115 |
+
kps[coco_idx] = {
|
| 116 |
+
"x": float(lm.x * w),
|
| 117 |
+
"y": float(lm.y * h),
|
| 118 |
+
"conf": float(lm.visibility),
|
| 119 |
+
}
|
| 120 |
+
out.append(kps)
|
| 121 |
+
except Exception:
|
| 122 |
+
out.append({})
|
| 123 |
+
return out
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
# ── Sapiens2 backend (Meta HF, transformers) ──────────────────────────────────
|
| 127 |
+
|
| 128 |
+
def _get_sapiens2(hf_id: str) -> object:
|
| 129 |
+
if hf_id not in _model_cache:
|
| 130 |
+
from transformers import pipeline as hf_pipeline
|
| 131 |
+
_model_cache[hf_id] = hf_pipeline("pose-estimation", model=hf_id)
|
| 132 |
+
return _model_cache[hf_id]
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
def _run_sapiens2(frames: list, hf_id: str) -> list[dict]:
|
| 136 |
+
try:
|
| 137 |
+
pipe = _get_sapiens2(hf_id)
|
| 138 |
+
except Exception as e:
|
| 139 |
+
logger.warning("sapiens2 load failed: %s", e)
|
| 140 |
+
return [{} for _ in frames]
|
| 141 |
+
|
| 142 |
+
from PIL import Image
|
| 143 |
+
|
| 144 |
+
out = []
|
| 145 |
+
for frame in frames:
|
| 146 |
+
try:
|
| 147 |
+
pil_img = Image.fromarray(frame)
|
| 148 |
+
result = pipe(pil_img)
|
| 149 |
+
|
| 150 |
+
if not result:
|
| 151 |
+
out.append({})
|
| 152 |
+
continue
|
| 153 |
+
|
| 154 |
+
# Take highest-confidence person (first result)
|
| 155 |
+
person = result[0]
|
| 156 |
+
keypoints = person.get("keypoints", [])
|
| 157 |
+
scores = person.get("keypoint_scores", [])
|
| 158 |
+
|
| 159 |
+
# Build name→(x, y, score) lookup from pipeline output
|
| 160 |
+
kp_lookup: dict[str, tuple] = {}
|
| 161 |
+
for i, kp in enumerate(keypoints):
|
| 162 |
+
if isinstance(kp, dict):
|
| 163 |
+
name = kp.get("label", "")
|
| 164 |
+
x, y = kp.get("x", 0.0), kp.get("y", 0.0)
|
| 165 |
+
else:
|
| 166 |
+
name = ""
|
| 167 |
+
x, y = float(kp[0]), float(kp[1])
|
| 168 |
+
score = float(scores[i]) if i < len(scores) else 0.0
|
| 169 |
+
if name:
|
| 170 |
+
kp_lookup[name] = (x, y, score)
|
| 171 |
+
|
| 172 |
+
kps: dict[int, dict] = {}
|
| 173 |
+
for coco_idx, name in enumerate(COCO_KEYPOINTS):
|
| 174 |
+
if name in kp_lookup:
|
| 175 |
+
x, y, s = kp_lookup[name]
|
| 176 |
+
kps[coco_idx] = {"x": x, "y": y, "conf": s}
|
| 177 |
+
out.append(kps)
|
| 178 |
+
except Exception:
|
| 179 |
+
out.append({})
|
| 180 |
+
return out
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
# ── Agent ─────────────────────────────────────────────────────────────────────
|
| 184 |
+
|
| 185 |
+
class Pose2DAgent:
|
| 186 |
+
"""Extracts COCO-17 keypoints per frame; dispatches to YOLO, MediaPipe, or Sapiens2."""
|
| 187 |
+
|
| 188 |
+
def run(self, ingest: IngestResult, model_key: str | None = None) -> Pose2DResult:
|
| 189 |
+
if not ingest.frames:
|
| 190 |
+
return Pose2DResult(keypoints=[], fps=ingest.fps, confidence=0.0, notes="no frames in ingest")
|
| 191 |
+
|
| 192 |
+
key = model_key or config.DEFAULT_POSE_MODEL
|
| 193 |
+
spec = config.POSE_MODELS.get(key)
|
| 194 |
+
if spec is None:
|
| 195 |
+
logger.warning("Unknown model_key %r — falling back to %s", key, config.DEFAULT_POSE_MODEL)
|
| 196 |
+
spec = config.POSE_MODELS[config.DEFAULT_POSE_MODEL]
|
| 197 |
+
|
| 198 |
+
backend = spec["backend"]
|
| 199 |
+
try:
|
| 200 |
+
if backend == "yolo":
|
| 201 |
+
kps_per_frame = _run_yolo(ingest.frames, spec["path"])
|
| 202 |
+
elif backend == "mediapipe":
|
| 203 |
+
kps_per_frame = _run_mediapipe(ingest.frames, spec["path"])
|
| 204 |
+
elif backend == "sapiens2":
|
| 205 |
+
kps_per_frame = _run_sapiens2(ingest.frames, spec["hf_id"])
|
| 206 |
+
else:
|
| 207 |
+
return Pose2DResult(
|
| 208 |
+
keypoints=[{} for _ in ingest.frames],
|
| 209 |
+
fps=ingest.fps, confidence=0.0,
|
| 210 |
+
notes=f"unknown backend: {backend}",
|
| 211 |
+
)
|
| 212 |
+
except Exception as e:
|
| 213 |
+
return Pose2DResult(
|
| 214 |
+
keypoints=[{} for _ in ingest.frames],
|
| 215 |
+
fps=ingest.fps, confidence=0.0,
|
| 216 |
+
notes=str(e),
|
| 217 |
+
)
|
| 218 |
+
|
| 219 |
+
n_detected = sum(1 for f in kps_per_frame if f)
|
| 220 |
+
total_conf = sum(
|
| 221 |
+
sum(kp["conf"] for kp in f.values()) / len(f)
|
| 222 |
+
for f in kps_per_frame if f
|
| 223 |
+
)
|
| 224 |
+
overall_conf = (total_conf / n_detected) if n_detected > 0 else 0.0
|
| 225 |
+
notes = "" if n_detected > 0 else "no person detected in any frame"
|
| 226 |
+
|
| 227 |
+
return Pose2DResult(
|
| 228 |
+
keypoints=kps_per_frame,
|
| 229 |
+
fps=ingest.fps,
|
| 230 |
+
confidence=overall_conf,
|
| 231 |
+
notes=notes,
|
| 232 |
+
)
|
formscout/agents/visualizer.py
ADDED
|
@@ -0,0 +1,371 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
PoseVisualizer — annotated overlay video with skeleton, trails, velocity arrows.
|
| 3 |
+
|
| 4 |
+
Input: IngestResult + Pose2DResult
|
| 5 |
+
Output: .mp4 path (or None on failure/empty layers)
|
| 6 |
+
Failure: returns None, never raises.
|
| 7 |
+
"""
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import colorsys
|
| 11 |
+
import logging
|
| 12 |
+
import math
|
| 13 |
+
import tempfile
|
| 14 |
+
from collections import deque
|
| 15 |
+
|
| 16 |
+
import cv2
|
| 17 |
+
import numpy as np
|
| 18 |
+
|
| 19 |
+
logger = logging.getLogger(__name__)
|
| 20 |
+
|
| 21 |
+
# ── COCO constants ────────────────────────────────────────────────────────────
|
| 22 |
+
|
| 23 |
+
COCO_KEYPOINTS = [
|
| 24 |
+
"nose", "left_eye", "right_eye", "left_ear", "right_ear",
|
| 25 |
+
"left_shoulder", "right_shoulder", "left_elbow", "right_elbow",
|
| 26 |
+
"left_wrist", "right_wrist", "left_hip", "right_hip",
|
| 27 |
+
"left_knee", "right_knee", "left_ankle", "right_ankle",
|
| 28 |
+
]
|
| 29 |
+
|
| 30 |
+
COCO_SKELETON = [
|
| 31 |
+
(0, 1), (0, 2), (1, 3), (2, 4), # face
|
| 32 |
+
(5, 6), (5, 7), (7, 9), (6, 8), (8, 10), # arms
|
| 33 |
+
(5, 11), (6, 12), (11, 12), # torso
|
| 34 |
+
(11, 13), (13, 15), (12, 14), (14, 16), # legs
|
| 35 |
+
]
|
| 36 |
+
|
| 37 |
+
TRAIL_LENGTH = 10
|
| 38 |
+
MAX_ARROW_PX = 40
|
| 39 |
+
CONF_THRESHOLD = 0.3
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
# ── Kalman filter ─────────────────────────────────────────────────────────────
|
| 43 |
+
|
| 44 |
+
class SimpleKalmanFilter:
|
| 45 |
+
"""4-state Kalman filter (x, y, vx, vy) for joint tracking."""
|
| 46 |
+
|
| 47 |
+
def __init__(self, process_noise: float = 0.01, measurement_noise: float = 0.1):
|
| 48 |
+
self.is_initialized = False
|
| 49 |
+
self.state = np.zeros(4)
|
| 50 |
+
self.cov = np.eye(4) * 0.1
|
| 51 |
+
self.Q = np.eye(4) * process_noise
|
| 52 |
+
self.R = np.eye(2) * measurement_noise
|
| 53 |
+
self.H = np.array([[1, 0, 0, 0], [0, 1, 0, 0]], dtype=float)
|
| 54 |
+
|
| 55 |
+
def predict(self, dt: float = 1.0):
|
| 56 |
+
F = np.array([[1, 0, dt, 0], [0, 1, 0, dt], [0, 0, 1, 0], [0, 0, 0, 1]], dtype=float)
|
| 57 |
+
self.state = F @ self.state
|
| 58 |
+
self.cov = F @ self.cov @ F.T + self.Q
|
| 59 |
+
|
| 60 |
+
def update(self, x: float, y: float):
|
| 61 |
+
z = np.array([x, y])
|
| 62 |
+
if not self.is_initialized:
|
| 63 |
+
self.state[:2] = z
|
| 64 |
+
self.is_initialized = True
|
| 65 |
+
return
|
| 66 |
+
S = self.H @ self.cov @ self.H.T + self.R
|
| 67 |
+
K = self.cov @ self.H.T @ np.linalg.inv(S)
|
| 68 |
+
self.state = self.state + K @ (z - self.H @ self.state)
|
| 69 |
+
self.cov = (np.eye(4) - K @ self.H) @ self.cov
|
| 70 |
+
|
| 71 |
+
def velocity_magnitude(self) -> float:
|
| 72 |
+
vx, vy = self.state[2], self.state[3]
|
| 73 |
+
return math.sqrt(vx * vx + vy * vy)
|
| 74 |
+
|
| 75 |
+
def velocity_vector(self) -> tuple[float, float]:
|
| 76 |
+
return float(self.state[2]), float(self.state[3])
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
# ── Velocity computation ──────────────────────────────────────────────────────
|
| 80 |
+
|
| 81 |
+
def compute_joint_velocity(
|
| 82 |
+
keypoints_per_frame: list[dict],
|
| 83 |
+
fps: float,
|
| 84 |
+
) -> dict[int, list[float]]:
|
| 85 |
+
"""
|
| 86 |
+
Compute Kalman-filtered per-joint speed (px/s) for each frame.
|
| 87 |
+
|
| 88 |
+
Returns dict[joint_idx, [speed_frame0, ...]] for all 17 COCO joints.
|
| 89 |
+
Missing/low-confidence keypoints yield speed=0.0 for that frame.
|
| 90 |
+
"""
|
| 91 |
+
dt = 1.0 / fps if fps > 0 else 1.0
|
| 92 |
+
filters: dict[int, SimpleKalmanFilter] = {j: SimpleKalmanFilter() for j in range(17)}
|
| 93 |
+
result: dict[int, list[float]] = {j: [] for j in range(17)}
|
| 94 |
+
|
| 95 |
+
for frame_kps in keypoints_per_frame:
|
| 96 |
+
for j in range(17):
|
| 97 |
+
kf = filters[j]
|
| 98 |
+
kp = frame_kps.get(j)
|
| 99 |
+
kf.predict(dt)
|
| 100 |
+
if kp and kp.get("conf", 0.0) >= CONF_THRESHOLD:
|
| 101 |
+
kf.update(kp["x"], kp["y"])
|
| 102 |
+
speed = kf.velocity_magnitude()
|
| 103 |
+
else:
|
| 104 |
+
speed = 0.0
|
| 105 |
+
result[j].append(speed)
|
| 106 |
+
|
| 107 |
+
return result
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
# ── Helpers ───────────────────────────────────────────────────────────────────
|
| 111 |
+
|
| 112 |
+
def _conf_to_bgr(conf: float) -> tuple[int, int, int]:
|
| 113 |
+
"""Map confidence 0→1 to BGR color red→green via HSV."""
|
| 114 |
+
hue = conf * 120.0 / 360.0
|
| 115 |
+
r, g, b = colorsys.hsv_to_rgb(hue, 1.0, 1.0)
|
| 116 |
+
return (int(b * 255), int(g * 255), int(r * 255))
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
# ── PoseVisualizer ────────────────────────────────────────────────────────────
|
| 120 |
+
|
| 121 |
+
class PoseVisualizer:
|
| 122 |
+
"""Renders skeleton, trails, and velocity arrows onto video frames."""
|
| 123 |
+
|
| 124 |
+
def __init__(self):
|
| 125 |
+
self.last_velocities: dict[int, list[float]] = {}
|
| 126 |
+
|
| 127 |
+
# ── Skeleton ──────────────────────────────────────────────────────────────
|
| 128 |
+
|
| 129 |
+
def _draw_skeleton(self, frame: np.ndarray, kps: dict) -> np.ndarray:
|
| 130 |
+
"""Draw COCO-17 bones (white) and joints (confidence-colored) onto frame."""
|
| 131 |
+
visible = {j: kp for j, kp in kps.items() if kp.get("conf", 0.0) >= CONF_THRESHOLD}
|
| 132 |
+
|
| 133 |
+
# Bones
|
| 134 |
+
for j1, j2 in COCO_SKELETON:
|
| 135 |
+
if j1 in visible and j2 in visible:
|
| 136 |
+
p1 = (int(visible[j1]["x"]), int(visible[j1]["y"]))
|
| 137 |
+
p2 = (int(visible[j2]["x"]), int(visible[j2]["y"]))
|
| 138 |
+
cv2.line(frame, p1, p2, (255, 255, 255), 2)
|
| 139 |
+
|
| 140 |
+
# Joints
|
| 141 |
+
for j, kp in visible.items():
|
| 142 |
+
pt = (int(kp["x"]), int(kp["y"]))
|
| 143 |
+
color = _conf_to_bgr(kp["conf"])
|
| 144 |
+
cv2.circle(frame, pt, 4, color, -1)
|
| 145 |
+
cv2.circle(frame, pt, 5, (255, 255, 255), 1)
|
| 146 |
+
|
| 147 |
+
return frame
|
| 148 |
+
|
| 149 |
+
# ── Trails ───────────────────────────────────────────────────────────────
|
| 150 |
+
|
| 151 |
+
def _draw_trails(self, frame: np.ndarray, trail_history: dict) -> np.ndarray:
|
| 152 |
+
"""Draw fading motion trails for each joint."""
|
| 153 |
+
for joint_idx, trail in trail_history.items():
|
| 154 |
+
pts = list(trail)
|
| 155 |
+
if len(pts) < 2:
|
| 156 |
+
continue
|
| 157 |
+
for i in range(1, len(pts)):
|
| 158 |
+
alpha = i / len(pts)
|
| 159 |
+
brightness = int(255 * alpha)
|
| 160 |
+
color = (brightness, brightness, brightness)
|
| 161 |
+
thickness = max(1, int(3 * alpha))
|
| 162 |
+
p1 = (int(pts[i - 1][0]), int(pts[i - 1][1]))
|
| 163 |
+
p2 = (int(pts[i][0]), int(pts[i][1]))
|
| 164 |
+
cv2.line(frame, p1, p2, color, thickness)
|
| 165 |
+
return frame
|
| 166 |
+
|
| 167 |
+
# ── Velocity arrows ───────────────────────────────────────────────────────
|
| 168 |
+
|
| 169 |
+
def _draw_velocity_arrows(
|
| 170 |
+
self,
|
| 171 |
+
frame: np.ndarray,
|
| 172 |
+
kps: dict,
|
| 173 |
+
prev_kps: dict | None,
|
| 174 |
+
velocities: dict[int, list[float]],
|
| 175 |
+
frame_idx: int,
|
| 176 |
+
) -> np.ndarray:
|
| 177 |
+
"""Draw per-joint velocity arrows scaled by speed."""
|
| 178 |
+
if prev_kps is None:
|
| 179 |
+
return frame
|
| 180 |
+
|
| 181 |
+
all_speeds = [velocities[j][frame_idx] for j in range(17) if frame_idx < len(velocities.get(j, []))]
|
| 182 |
+
peak = max(all_speeds) if all_speeds else 1.0
|
| 183 |
+
if peak == 0.0:
|
| 184 |
+
return frame
|
| 185 |
+
|
| 186 |
+
for j in range(17):
|
| 187 |
+
kp = kps.get(j)
|
| 188 |
+
pk = prev_kps.get(j)
|
| 189 |
+
if not kp or not pk:
|
| 190 |
+
continue
|
| 191 |
+
if kp.get("conf", 0.0) < CONF_THRESHOLD:
|
| 192 |
+
continue
|
| 193 |
+
speeds = velocities.get(j, [])
|
| 194 |
+
if frame_idx >= len(speeds):
|
| 195 |
+
continue
|
| 196 |
+
speed = speeds[frame_idx]
|
| 197 |
+
if speed == 0.0:
|
| 198 |
+
continue
|
| 199 |
+
|
| 200 |
+
dx = kp["x"] - pk["x"]
|
| 201 |
+
dy = kp["y"] - pk["y"]
|
| 202 |
+
mag = math.sqrt(dx * dx + dy * dy)
|
| 203 |
+
if mag < 1e-6:
|
| 204 |
+
continue
|
| 205 |
+
|
| 206 |
+
length = min(speed / peak * MAX_ARROW_PX, MAX_ARROW_PX)
|
| 207 |
+
nx, ny = dx / mag, dy / mag
|
| 208 |
+
start = (int(kp["x"]), int(kp["y"]))
|
| 209 |
+
end = (int(kp["x"] + nx * length), int(kp["y"] + ny * length))
|
| 210 |
+
|
| 211 |
+
ratio = speed / peak
|
| 212 |
+
if ratio < 0.33:
|
| 213 |
+
color = (0, 200, 0) # green
|
| 214 |
+
elif ratio < 0.66:
|
| 215 |
+
color = (0, 140, 255) # orange
|
| 216 |
+
else:
|
| 217 |
+
color = (0, 0, 255) # red
|
| 218 |
+
|
| 219 |
+
cv2.arrowedLine(frame, start, end, color, 2, tipLength=0.35)
|
| 220 |
+
|
| 221 |
+
return frame
|
| 222 |
+
|
| 223 |
+
# ── Public ────────────────────────────────────────────────────────────────
|
| 224 |
+
|
| 225 |
+
def render_video(
|
| 226 |
+
self,
|
| 227 |
+
ingest,
|
| 228 |
+
pose2d,
|
| 229 |
+
layers: set[str],
|
| 230 |
+
output_path: str,
|
| 231 |
+
) -> str | None:
|
| 232 |
+
"""
|
| 233 |
+
Render annotated video. Returns output_path on success, None otherwise.
|
| 234 |
+
layers: subset of {"skeleton", "trails", "velocity_arrows"}
|
| 235 |
+
"""
|
| 236 |
+
if not layers:
|
| 237 |
+
return None
|
| 238 |
+
|
| 239 |
+
if not any(pose2d.keypoints):
|
| 240 |
+
return None
|
| 241 |
+
|
| 242 |
+
try:
|
| 243 |
+
velocities = compute_joint_velocity(pose2d.keypoints, ingest.fps)
|
| 244 |
+
self.last_velocities = velocities
|
| 245 |
+
|
| 246 |
+
frames = ingest.frames
|
| 247 |
+
orig_h, orig_w = frames[0].shape[:2]
|
| 248 |
+
fps = ingest.fps or 30.0
|
| 249 |
+
|
| 250 |
+
# Cap at 1280px wide — big frames are slow and don't need to be HQ
|
| 251 |
+
max_w = 1280
|
| 252 |
+
if orig_w > max_w:
|
| 253 |
+
scale = max_w / orig_w
|
| 254 |
+
out_w = max_w
|
| 255 |
+
out_h = int(orig_h * scale)
|
| 256 |
+
else:
|
| 257 |
+
scale = 1.0
|
| 258 |
+
out_w, out_h = orig_w, orig_h
|
| 259 |
+
|
| 260 |
+
# Scale keypoint coordinates to match resized frames
|
| 261 |
+
def _scale_kps(kps: dict) -> dict:
|
| 262 |
+
if scale == 1.0:
|
| 263 |
+
return kps
|
| 264 |
+
return {
|
| 265 |
+
j: {**kp, "x": kp["x"] * scale, "y": kp["y"] * scale}
|
| 266 |
+
for j, kp in kps.items()
|
| 267 |
+
}
|
| 268 |
+
|
| 269 |
+
scaled_keypoints = [_scale_kps(k) for k in pose2d.keypoints]
|
| 270 |
+
|
| 271 |
+
# Write raw mp4v to a temp file, then remux with ffmpeg faststart
|
| 272 |
+
import subprocess
|
| 273 |
+
import tempfile as _tf
|
| 274 |
+
tmp = _tf.NamedTemporaryFile(suffix="_raw.mp4", delete=False)
|
| 275 |
+
tmp_path = tmp.name
|
| 276 |
+
tmp.close()
|
| 277 |
+
|
| 278 |
+
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
|
| 279 |
+
writer = cv2.VideoWriter(tmp_path, fourcc, fps, (out_w, out_h))
|
| 280 |
+
if not writer.isOpened():
|
| 281 |
+
logger.warning("VideoWriter failed to open: %s", tmp_path)
|
| 282 |
+
return None
|
| 283 |
+
|
| 284 |
+
trail_history: dict[int, deque] = {j: deque(maxlen=TRAIL_LENGTH) for j in range(17)}
|
| 285 |
+
prev_kps: dict | None = None
|
| 286 |
+
|
| 287 |
+
for frame_idx, (frame, kps) in enumerate(zip(frames, scaled_keypoints)):
|
| 288 |
+
if scale != 1.0:
|
| 289 |
+
out_frame = cv2.resize(frame, (out_w, out_h), interpolation=cv2.INTER_AREA)
|
| 290 |
+
else:
|
| 291 |
+
out_frame = frame.copy()
|
| 292 |
+
|
| 293 |
+
if "trails" in layers:
|
| 294 |
+
for j, kp in kps.items():
|
| 295 |
+
if kp.get("conf", 0.0) >= CONF_THRESHOLD:
|
| 296 |
+
trail_history[j].append((kp["x"], kp["y"]))
|
| 297 |
+
out_frame = self._draw_trails(out_frame, trail_history)
|
| 298 |
+
|
| 299 |
+
if "skeleton" in layers:
|
| 300 |
+
out_frame = self._draw_skeleton(out_frame, kps)
|
| 301 |
+
|
| 302 |
+
if "velocity_arrows" in layers:
|
| 303 |
+
out_frame = self._draw_velocity_arrows(
|
| 304 |
+
out_frame, kps, prev_kps, velocities, frame_idx
|
| 305 |
+
)
|
| 306 |
+
|
| 307 |
+
writer.write(out_frame)
|
| 308 |
+
prev_kps = kps
|
| 309 |
+
|
| 310 |
+
writer.release()
|
| 311 |
+
|
| 312 |
+
# Remux with faststart so browsers can seek without downloading the whole file
|
| 313 |
+
try:
|
| 314 |
+
subprocess.run(
|
| 315 |
+
["ffmpeg", "-y", "-i", tmp_path, "-c", "copy",
|
| 316 |
+
"-movflags", "+faststart", output_path],
|
| 317 |
+
check=True, capture_output=True,
|
| 318 |
+
)
|
| 319 |
+
import os
|
| 320 |
+
os.unlink(tmp_path)
|
| 321 |
+
except Exception as ffmpeg_err:
|
| 322 |
+
logger.warning("ffmpeg remux failed (%s) — using raw mp4v", ffmpeg_err)
|
| 323 |
+
import shutil
|
| 324 |
+
shutil.move(tmp_path, output_path)
|
| 325 |
+
|
| 326 |
+
return output_path
|
| 327 |
+
|
| 328 |
+
except Exception as e:
|
| 329 |
+
logger.warning("render_video failed: %s", e)
|
| 330 |
+
return None
|
| 331 |
+
|
| 332 |
+
|
| 333 |
+
# ── Velocity summary ──────────────────────────────────────────────────────────
|
| 334 |
+
|
| 335 |
+
def build_velocity_summary(
|
| 336 |
+
keypoints_per_frame: list[dict],
|
| 337 |
+
velocities: dict[int, list[float]],
|
| 338 |
+
) -> str:
|
| 339 |
+
"""Return markdown table of per-joint avg/peak velocity. Empty string if no valid joints."""
|
| 340 |
+
n_frames = len(keypoints_per_frame)
|
| 341 |
+
if n_frames == 0:
|
| 342 |
+
return ""
|
| 343 |
+
|
| 344 |
+
rows = []
|
| 345 |
+
for j in range(17):
|
| 346 |
+
detected = sum(
|
| 347 |
+
1 for kps in keypoints_per_frame
|
| 348 |
+
if kps.get(j, {}).get("conf", 0.0) >= CONF_THRESHOLD
|
| 349 |
+
)
|
| 350 |
+
if detected < n_frames * 0.5:
|
| 351 |
+
continue
|
| 352 |
+
|
| 353 |
+
speeds = velocities.get(j, [])
|
| 354 |
+
if not speeds:
|
| 355 |
+
continue
|
| 356 |
+
|
| 357 |
+
avg_speed = sum(speeds) / len(speeds)
|
| 358 |
+
peak_speed = max(speeds)
|
| 359 |
+
rows.append((COCO_KEYPOINTS[j], avg_speed, peak_speed))
|
| 360 |
+
|
| 361 |
+
if not rows:
|
| 362 |
+
return ""
|
| 363 |
+
|
| 364 |
+
rows.sort(key=lambda r: r[2], reverse=True)
|
| 365 |
+
lines = [
|
| 366 |
+
"| Joint | Avg (px/s) | Peak (px/s) |",
|
| 367 |
+
"|---|---|---|",
|
| 368 |
+
]
|
| 369 |
+
for name, avg, peak in rows:
|
| 370 |
+
lines.append(f"| {name} | {avg:.1f} | {peak:.1f} |")
|
| 371 |
+
return "\n".join(lines)
|
formscout/config.py
CHANGED
|
@@ -8,8 +8,89 @@ from pathlib import Path
|
|
| 8 |
ROOT = Path(__file__).parent.parent
|
| 9 |
|
| 10 |
# ─── Model IDs ───────────────────────────────────────────────────────────────
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
SAM_CHECKPOINT = "sam2.1_hiera_base_plus.pt"
|
| 14 |
SAM_3D_CHECKPOINT = ROOT / "checkpoints" / "sam-3d-body-dinov3" / "model.ckpt"
|
| 15 |
SAM_3D_HF_REPO = "facebook/sam-3d-body-dinov3"
|
|
|
|
| 8 |
ROOT = Path(__file__).parent.parent
|
| 9 |
|
| 10 |
# ─── Model IDs ───────────────────────────────────────────────────────────────
|
| 11 |
+
_YOLO_DIR = ROOT / "checkpoints" / "yolo26"
|
| 12 |
+
|
| 13 |
+
POSE_MODELS: dict[str, dict] = {
|
| 14 |
+
# ── MediaPipe (official Tasks API, local checkpoint) ───────────────────
|
| 15 |
+
"MediaPipe-Pose — full (~9 MB, CPU-friendly)": {
|
| 16 |
+
"backend": "mediapipe",
|
| 17 |
+
"path": str(ROOT / "checkpoints" / "mediapipe" / "pose_landmarker_full.task"),
|
| 18 |
+
"params_m": 4.2,
|
| 19 |
+
},
|
| 20 |
+
# ── YOLO26 (local checkpoints) ─────────────────────────────────────────
|
| 21 |
+
"YOLO26n — nano (0.7M, fastest)": {
|
| 22 |
+
"backend": "yolo",
|
| 23 |
+
"path": str(_YOLO_DIR / "yolo26n-pose.pt"),
|
| 24 |
+
"params_m": 0.7,
|
| 25 |
+
},
|
| 26 |
+
"YOLO26s — small (3.5M)": {
|
| 27 |
+
"backend": "yolo",
|
| 28 |
+
"path": str(_YOLO_DIR / "yolo26s-pose.pt"),
|
| 29 |
+
"params_m": 3.5,
|
| 30 |
+
},
|
| 31 |
+
"YOLO26m — medium (9M)": {
|
| 32 |
+
"backend": "yolo",
|
| 33 |
+
"path": str(_YOLO_DIR / "yolo26m-pose.pt"),
|
| 34 |
+
"params_m": 9.0,
|
| 35 |
+
},
|
| 36 |
+
"YOLO26l — large (25.9M)": {
|
| 37 |
+
"backend": "yolo",
|
| 38 |
+
"path": str(_YOLO_DIR / "yolo26l-pose.pt"),
|
| 39 |
+
"params_m": 25.9,
|
| 40 |
+
},
|
| 41 |
+
"YOLO26x — extra-large (57.6M)": {
|
| 42 |
+
"backend": "yolo",
|
| 43 |
+
"path": str(_YOLO_DIR / "yolo26x-pose.pt"),
|
| 44 |
+
"params_m": 57.6,
|
| 45 |
+
},
|
| 46 |
+
# ── Sapiens2 (Phase 3 — needs custom repo + detector, 308-kp Sociopticon) ─
|
| 47 |
+
"Sapiens2-0.4B [Phase 3, ~1.6 GB]": {
|
| 48 |
+
"backend": "sapiens2",
|
| 49 |
+
"hf_id": "facebook/sapiens2-pose-0.4b",
|
| 50 |
+
"params_m": 400,
|
| 51 |
+
},
|
| 52 |
+
"Sapiens2-0.8B [Phase 3, ~3.2 GB]": {
|
| 53 |
+
"backend": "sapiens2",
|
| 54 |
+
"hf_id": "facebook/sapiens2-pose-0.8b",
|
| 55 |
+
"params_m": 800,
|
| 56 |
+
},
|
| 57 |
+
"Sapiens2-1B [Phase 3, ~6 GB]": {
|
| 58 |
+
"backend": "sapiens2",
|
| 59 |
+
"hf_id": "facebook/sapiens2-pose-1b",
|
| 60 |
+
"params_m": 1000,
|
| 61 |
+
},
|
| 62 |
+
"Sapiens2-5B [Phase 3, ~20 GB, large GPU]": {
|
| 63 |
+
"backend": "sapiens2",
|
| 64 |
+
"hf_id": "facebook/sapiens2-pose-5b",
|
| 65 |
+
"params_m": 5000,
|
| 66 |
+
},
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
DEFAULT_POSE_MODEL = "YOLO26n — nano (0.7M, fastest)"
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def _is_model_available(spec: dict) -> bool:
|
| 73 |
+
"""Return True if the model checkpoint is present and the backend is importable."""
|
| 74 |
+
backend = spec["backend"]
|
| 75 |
+
if backend in ("yolo", "mediapipe"):
|
| 76 |
+
return Path(spec["path"]).exists()
|
| 77 |
+
if backend == "sapiens2":
|
| 78 |
+
try:
|
| 79 |
+
import sapiens # noqa: F401 — custom repo must be installed
|
| 80 |
+
return True
|
| 81 |
+
except ImportError:
|
| 82 |
+
return False
|
| 83 |
+
return False
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def available_pose_models() -> dict[str, dict]:
|
| 87 |
+
"""Subset of POSE_MODELS whose checkpoints/backends are actually ready."""
|
| 88 |
+
return {name: spec for name, spec in POSE_MODELS.items() if _is_model_available(spec)}
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
# Backward-compat aliases
|
| 92 |
+
YOLO_POSE_MODEL = str(_YOLO_DIR / "yolo26l-pose.pt")
|
| 93 |
+
YOLO_POSE_MODEL_HQ = str(_YOLO_DIR / "yolo26x-pose.pt")
|
| 94 |
SAM_CHECKPOINT = "sam2.1_hiera_base_plus.pt"
|
| 95 |
SAM_3D_CHECKPOINT = ROOT / "checkpoints" / "sam-3d-body-dinov3" / "model.ckpt"
|
| 96 |
SAM_3D_HF_REPO = "facebook/sam-3d-body-dinov3"
|
formscout/pipeline.py
CHANGED
|
@@ -37,10 +37,11 @@ class Director:
|
|
| 37 |
self._judge = JudgeAgent()
|
| 38 |
self._report = ReportAgent()
|
| 39 |
|
| 40 |
-
def run(self, video_path: str, test_name: str = "deep_squat", side: str = "na") -> PipelineState:
|
| 41 |
"""
|
| 42 |
Run the full pipeline on a single video.
|
| 43 |
test_name/side serve as manual override when provided (skips classifier).
|
|
|
|
| 44 |
"""
|
| 45 |
state = PipelineState(video_path=video_path)
|
| 46 |
|
|
@@ -51,7 +52,7 @@ class Director:
|
|
| 51 |
return state
|
| 52 |
|
| 53 |
# ─── Pose 2D ───
|
| 54 |
-
state.pose2d = self._pose2d.run(state.ingest)
|
| 55 |
if state.pose2d.confidence < config.MIN_CONFIDENCE:
|
| 56 |
state.warnings.append("pose2d: low confidence — no clear person detected")
|
| 57 |
|
|
|
|
| 37 |
self._judge = JudgeAgent()
|
| 38 |
self._report = ReportAgent()
|
| 39 |
|
| 40 |
+
def run(self, video_path: str, test_name: str = "deep_squat", side: str = "na", model_key: str | None = None) -> PipelineState:
|
| 41 |
"""
|
| 42 |
Run the full pipeline on a single video.
|
| 43 |
test_name/side serve as manual override when provided (skips classifier).
|
| 44 |
+
model_key selects the pose backend (see config.POSE_MODELS).
|
| 45 |
"""
|
| 46 |
state = PipelineState(video_path=video_path)
|
| 47 |
|
|
|
|
| 52 |
return state
|
| 53 |
|
| 54 |
# ─── Pose 2D ───
|
| 55 |
+
state.pose2d = self._pose2d.run(state.ingest, model_key=model_key)
|
| 56 |
if state.pose2d.confidence < config.MIN_CONFIDENCE:
|
| 57 |
state.warnings.append("pose2d: low confidence — no clear person detected")
|
| 58 |
|
formscout/startup.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Checkpoint bootstrap — downloads missing model files from HF model repo on first run.
|
| 3 |
+
Called once at app startup before build_app(); no-ops if files already present.
|
| 4 |
+
"""
|
| 5 |
+
from __future__ import annotations
|
| 6 |
+
|
| 7 |
+
import logging
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
logger = logging.getLogger(__name__)
|
| 11 |
+
|
| 12 |
+
CHECKPOINT_REPO = "silas-therapy/formscout-checkpoints"
|
| 13 |
+
ROOT = Path(__file__).parent.parent
|
| 14 |
+
|
| 15 |
+
_CHECKPOINTS = [
|
| 16 |
+
"checkpoints/yolo26/yolo26n-pose.pt",
|
| 17 |
+
"checkpoints/yolo26/yolo26s-pose.pt",
|
| 18 |
+
"checkpoints/yolo26/yolo26m-pose.pt",
|
| 19 |
+
"checkpoints/yolo26/yolo26l-pose.pt",
|
| 20 |
+
"checkpoints/yolo26/yolo26x-pose.pt",
|
| 21 |
+
"checkpoints/mediapipe/pose_landmarker_full.task",
|
| 22 |
+
]
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def ensure_checkpoints() -> None:
|
| 26 |
+
"""Download any missing checkpoints from silas-therapy/formscout-checkpoints."""
|
| 27 |
+
try:
|
| 28 |
+
from huggingface_hub import hf_hub_download
|
| 29 |
+
except ImportError:
|
| 30 |
+
logger.warning("huggingface_hub not installed — skipping checkpoint download")
|
| 31 |
+
return
|
| 32 |
+
|
| 33 |
+
for rel_path in _CHECKPOINTS:
|
| 34 |
+
local = ROOT / rel_path
|
| 35 |
+
if local.exists():
|
| 36 |
+
continue
|
| 37 |
+
logger.info("Downloading %s ...", rel_path)
|
| 38 |
+
try:
|
| 39 |
+
local.parent.mkdir(parents=True, exist_ok=True)
|
| 40 |
+
hf_hub_download(
|
| 41 |
+
repo_id=CHECKPOINT_REPO,
|
| 42 |
+
filename=rel_path,
|
| 43 |
+
local_dir=str(ROOT),
|
| 44 |
+
)
|
| 45 |
+
logger.info("Downloaded %s", rel_path)
|
| 46 |
+
except Exception as e:
|
| 47 |
+
logger.warning("Failed to download %s: %s", rel_path, e)
|
requirements.txt
CHANGED
|
@@ -11,3 +11,5 @@ ruff>=0.4
|
|
| 11 |
black>=24.4
|
| 12 |
huggingface_hub>=0.23
|
| 13 |
transformers>=4.44
|
|
|
|
|
|
|
|
|
| 11 |
black>=24.4
|
| 12 |
huggingface_hub>=0.23
|
| 13 |
transformers>=4.44
|
| 14 |
+
onnxruntime>=1.18
|
| 15 |
+
mediapipe>=0.10
|
tests/test_pose2d.py
CHANGED
|
@@ -1,6 +1,9 @@
|
|
| 1 |
"""Tests for Pose2DAgent — model-dependent, skips if YOLO unavailable."""
|
| 2 |
-
import
|
|
|
|
|
|
|
| 3 |
import numpy as np
|
|
|
|
| 4 |
|
| 5 |
from formscout.types import IngestResult, Pose2DResult
|
| 6 |
|
|
@@ -46,3 +49,60 @@ class TestPose2DAgent:
|
|
| 46 |
result = pose2d_agent.run(empty)
|
| 47 |
assert result.confidence == 0.0
|
| 48 |
assert "no frames" in result.notes.lower()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
"""Tests for Pose2DAgent — model-dependent, skips if YOLO unavailable."""
|
| 2 |
+
import inspect
|
| 3 |
+
import unittest.mock as mock
|
| 4 |
+
|
| 5 |
import numpy as np
|
| 6 |
+
import pytest
|
| 7 |
|
| 8 |
from formscout.types import IngestResult, Pose2DResult
|
| 9 |
|
|
|
|
| 49 |
result = pose2d_agent.run(empty)
|
| 50 |
assert result.confidence == 0.0
|
| 51 |
assert "no frames" in result.notes.lower()
|
| 52 |
+
|
| 53 |
+
def test_run_accepts_model_key(self, pose2d_agent):
|
| 54 |
+
sig = inspect.signature(pose2d_agent.run)
|
| 55 |
+
assert "model_key" in sig.parameters
|
| 56 |
+
assert "model_path" not in sig.parameters
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def _blank_ingest_3():
|
| 60 |
+
frames = [np.zeros((480, 640, 3), dtype=np.uint8) for _ in range(3)]
|
| 61 |
+
return IngestResult(frames=frames, fps=30.0, duration=0.1, n_people=1, width=640, height=480)
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
class TestPose2DBackendsMocked:
|
| 65 |
+
"""Backend dispatch tests — no real model downloads."""
|
| 66 |
+
|
| 67 |
+
def test_yolo_backend_dispatches(self):
|
| 68 |
+
from formscout.agents.pose2d import Pose2DAgent
|
| 69 |
+
fake_kps = [{0: {"x": 10.0, "y": 20.0, "conf": 0.9}} for _ in range(3)]
|
| 70 |
+
with mock.patch("formscout.agents.pose2d._run_yolo", return_value=fake_kps) as m:
|
| 71 |
+
result = Pose2DAgent().run(_blank_ingest_3(), model_key="YOLO26n — nano (0.7M, fastest)")
|
| 72 |
+
m.assert_called_once()
|
| 73 |
+
assert isinstance(result, Pose2DResult)
|
| 74 |
+
assert len(result.keypoints) == 3
|
| 75 |
+
assert result.confidence > 0.0
|
| 76 |
+
|
| 77 |
+
def test_mediapipe_backend_dispatches(self):
|
| 78 |
+
from formscout.agents.pose2d import Pose2DAgent
|
| 79 |
+
fake_kps = [{i: {"x": float(i), "y": float(i), "conf": 0.8} for i in range(17)} for _ in range(3)]
|
| 80 |
+
with mock.patch("formscout.agents.pose2d._run_mediapipe", return_value=fake_kps) as m:
|
| 81 |
+
result = Pose2DAgent().run(_blank_ingest_3(), model_key="MediaPipe-Pose — full (~9 MB, CPU-friendly)")
|
| 82 |
+
m.assert_called_once()
|
| 83 |
+
assert isinstance(result, Pose2DResult)
|
| 84 |
+
assert len(result.keypoints) == 3
|
| 85 |
+
assert all(len(f) == 17 for f in result.keypoints)
|
| 86 |
+
|
| 87 |
+
def test_sapiens2_backend_dispatches(self):
|
| 88 |
+
from formscout.agents.pose2d import Pose2DAgent
|
| 89 |
+
fake_kps = [{i: {"x": float(i), "y": float(i), "conf": 0.85} for i in range(17)} for _ in range(3)]
|
| 90 |
+
with mock.patch("formscout.agents.pose2d._run_sapiens2", return_value=fake_kps) as m:
|
| 91 |
+
result = Pose2DAgent().run(_blank_ingest_3(), model_key="Sapiens2-0.4B [Phase 3, ~1.6 GB]")
|
| 92 |
+
m.assert_called_once()
|
| 93 |
+
assert isinstance(result, Pose2DResult)
|
| 94 |
+
assert len(result.keypoints) == 3
|
| 95 |
+
|
| 96 |
+
def test_unknown_model_key_falls_back(self):
|
| 97 |
+
from formscout.agents.pose2d import Pose2DAgent
|
| 98 |
+
fake_kps = [{0: {"x": 1.0, "y": 2.0, "conf": 0.7}} for _ in range(3)]
|
| 99 |
+
with mock.patch("formscout.agents.pose2d._run_yolo", return_value=fake_kps):
|
| 100 |
+
result = Pose2DAgent().run(_blank_ingest_3(), model_key="nonexistent-model-xyz")
|
| 101 |
+
assert isinstance(result, Pose2DResult)
|
| 102 |
+
|
| 103 |
+
def test_confidence_zero_on_empty_keypoints(self):
|
| 104 |
+
from formscout.agents.pose2d import Pose2DAgent
|
| 105 |
+
with mock.patch("formscout.agents.pose2d._run_yolo", return_value=[{}, {}, {}]):
|
| 106 |
+
result = Pose2DAgent().run(_blank_ingest_3(), model_key="YOLO26n — nano (0.7M, fastest)")
|
| 107 |
+
assert result.confidence == 0.0
|
| 108 |
+
assert "no person" in result.notes.lower()
|
tests/test_visualizer.py
ADDED
|
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tests for PoseVisualizer — no GPU, no model downloads."""
|
| 2 |
+
import numpy as np
|
| 3 |
+
import pytest
|
| 4 |
+
from formscout.types import IngestResult, Pose2DResult
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def _make_ingest(n=5, h=480, w=640, fps=30.0):
|
| 8 |
+
frames = [np.zeros((h, w, 3), dtype=np.uint8) for _ in range(n)]
|
| 9 |
+
return IngestResult(frames=frames, fps=fps, duration=n / fps, n_people=1, width=w, height=h)
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def _make_pose(n=5, w=640, h=480):
|
| 13 |
+
"""Synthetic Pose2DResult: 17 joints at fixed pixel positions, conf=0.9."""
|
| 14 |
+
kps_per_frame = []
|
| 15 |
+
for i in range(n):
|
| 16 |
+
frame_kps = {}
|
| 17 |
+
for j in range(17):
|
| 18 |
+
frame_kps[j] = {
|
| 19 |
+
"x": float(50 + j * 30 + i * 2),
|
| 20 |
+
"y": float(100 + j * 20),
|
| 21 |
+
"conf": 0.9,
|
| 22 |
+
}
|
| 23 |
+
kps_per_frame.append(frame_kps)
|
| 24 |
+
return Pose2DResult(keypoints=kps_per_frame, fps=30.0, confidence=0.9, notes="")
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
class TestComputeJointVelocity:
|
| 28 |
+
def test_returns_17_joints(self):
|
| 29 |
+
from formscout.agents.visualizer import compute_joint_velocity
|
| 30 |
+
pose = _make_pose(n=5)
|
| 31 |
+
result = compute_joint_velocity(pose.keypoints, fps=30.0)
|
| 32 |
+
assert len(result) == 17
|
| 33 |
+
|
| 34 |
+
def test_each_list_has_n_frames(self):
|
| 35 |
+
from formscout.agents.visualizer import compute_joint_velocity
|
| 36 |
+
pose = _make_pose(n=5)
|
| 37 |
+
result = compute_joint_velocity(pose.keypoints, fps=30.0)
|
| 38 |
+
for joint_idx, speeds in result.items():
|
| 39 |
+
assert len(speeds) == 5, f"joint {joint_idx} has {len(speeds)} speeds, expected 5"
|
| 40 |
+
|
| 41 |
+
def test_speeds_are_non_negative(self):
|
| 42 |
+
from formscout.agents.visualizer import compute_joint_velocity
|
| 43 |
+
pose = _make_pose(n=5)
|
| 44 |
+
result = compute_joint_velocity(pose.keypoints, fps=30.0)
|
| 45 |
+
for speeds in result.values():
|
| 46 |
+
assert all(s >= 0.0 for s in speeds)
|
| 47 |
+
|
| 48 |
+
def test_missing_keypoints_give_zero_speed(self):
|
| 49 |
+
from formscout.agents.visualizer import compute_joint_velocity
|
| 50 |
+
empty_kps = [{} for _ in range(5)]
|
| 51 |
+
result = compute_joint_velocity(empty_kps, fps=30.0)
|
| 52 |
+
for speeds in result.values():
|
| 53 |
+
assert all(s == 0.0 for s in speeds)
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
class TestDrawSkeleton:
|
| 57 |
+
def test_skeleton_draws_without_error(self):
|
| 58 |
+
from formscout.agents.visualizer import PoseVisualizer
|
| 59 |
+
vis = PoseVisualizer()
|
| 60 |
+
frame = np.zeros((480, 640, 3), dtype=np.uint8)
|
| 61 |
+
kps = {j: {"x": float(50 + j * 30), "y": float(100 + j * 20), "conf": 0.9}
|
| 62 |
+
for j in range(17)}
|
| 63 |
+
result = vis._draw_skeleton(frame.copy(), kps)
|
| 64 |
+
assert result.shape == frame.shape
|
| 65 |
+
assert not np.array_equal(result, frame)
|
| 66 |
+
|
| 67 |
+
def test_low_confidence_keypoints_not_drawn(self):
|
| 68 |
+
from formscout.agents.visualizer import PoseVisualizer
|
| 69 |
+
vis = PoseVisualizer()
|
| 70 |
+
frame = np.zeros((480, 640, 3), dtype=np.uint8)
|
| 71 |
+
kps = {j: {"x": float(50 + j * 30), "y": 100.0, "conf": 0.1} for j in range(17)}
|
| 72 |
+
result = vis._draw_skeleton(frame.copy(), kps)
|
| 73 |
+
assert np.array_equal(result, frame)
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
class TestDrawTrails:
|
| 77 |
+
def test_trails_draw_without_error(self):
|
| 78 |
+
from formscout.agents.visualizer import PoseVisualizer, TRAIL_LENGTH
|
| 79 |
+
from collections import deque
|
| 80 |
+
vis = PoseVisualizer()
|
| 81 |
+
frame = np.zeros((480, 640, 3), dtype=np.uint8)
|
| 82 |
+
trail_history = {
|
| 83 |
+
0: deque([(100 + i * 5, 200 + i * 3) for i in range(5)], maxlen=TRAIL_LENGTH)
|
| 84 |
+
}
|
| 85 |
+
result = vis._draw_trails(frame.copy(), trail_history)
|
| 86 |
+
assert result.shape == frame.shape
|
| 87 |
+
assert not np.array_equal(result, frame)
|
| 88 |
+
|
| 89 |
+
def test_short_trail_no_crash(self):
|
| 90 |
+
from formscout.agents.visualizer import PoseVisualizer, TRAIL_LENGTH
|
| 91 |
+
from collections import deque
|
| 92 |
+
vis = PoseVisualizer()
|
| 93 |
+
frame = np.zeros((480, 640, 3), dtype=np.uint8)
|
| 94 |
+
trail_history = {0: deque([(100, 200)], maxlen=TRAIL_LENGTH)}
|
| 95 |
+
result = vis._draw_trails(frame.copy(), trail_history)
|
| 96 |
+
assert np.array_equal(result, frame)
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
class TestDrawVelocityArrows:
|
| 100 |
+
def test_arrows_draw_without_error(self):
|
| 101 |
+
from formscout.agents.visualizer import PoseVisualizer
|
| 102 |
+
vis = PoseVisualizer()
|
| 103 |
+
frame = np.zeros((480, 640, 3), dtype=np.uint8)
|
| 104 |
+
kps = {j: {"x": float(50 + j * 30), "y": float(100 + j * 20), "conf": 0.9}
|
| 105 |
+
for j in range(17)}
|
| 106 |
+
prev_kps = {j: {"x": float(48 + j * 30), "y": float(98 + j * 20), "conf": 0.9}
|
| 107 |
+
for j in range(17)}
|
| 108 |
+
velocities = {j: [0.0] * 5 for j in range(17)}
|
| 109 |
+
velocities[5] = [0.0, 10.0, 50.0, 80.0, 120.0]
|
| 110 |
+
result = vis._draw_velocity_arrows(frame.copy(), kps, prev_kps, velocities, frame_idx=4)
|
| 111 |
+
assert result.shape == frame.shape
|
| 112 |
+
|
| 113 |
+
def test_no_prev_kps_no_crash(self):
|
| 114 |
+
from formscout.agents.visualizer import PoseVisualizer
|
| 115 |
+
vis = PoseVisualizer()
|
| 116 |
+
frame = np.zeros((480, 640, 3), dtype=np.uint8)
|
| 117 |
+
kps = {j: {"x": float(50 + j * 30), "y": 100.0, "conf": 0.9} for j in range(17)}
|
| 118 |
+
velocities = {j: [50.0] * 5 for j in range(17)}
|
| 119 |
+
result = vis._draw_velocity_arrows(frame.copy(), kps, None, velocities, frame_idx=0)
|
| 120 |
+
assert result.shape == frame.shape
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
class TestRenderVideo:
|
| 124 |
+
def test_creates_mp4_file(self, tmp_path):
|
| 125 |
+
from formscout.agents.visualizer import PoseVisualizer
|
| 126 |
+
vis = PoseVisualizer()
|
| 127 |
+
ingest = _make_ingest(n=5)
|
| 128 |
+
pose = _make_pose(n=5)
|
| 129 |
+
out = str(tmp_path / "out.mp4")
|
| 130 |
+
result = vis.render_video(ingest, pose, {"skeleton"}, out)
|
| 131 |
+
assert result is not None
|
| 132 |
+
import os
|
| 133 |
+
assert os.path.exists(result)
|
| 134 |
+
assert os.path.getsize(result) > 0
|
| 135 |
+
|
| 136 |
+
def test_empty_layers_returns_none(self, tmp_path):
|
| 137 |
+
from formscout.agents.visualizer import PoseVisualizer
|
| 138 |
+
vis = PoseVisualizer()
|
| 139 |
+
out = str(tmp_path / "out.mp4")
|
| 140 |
+
result = vis.render_video(_make_ingest(), _make_pose(), set(), out)
|
| 141 |
+
assert result is None
|
| 142 |
+
|
| 143 |
+
def test_no_detections_returns_none(self, tmp_path):
|
| 144 |
+
from formscout.agents.visualizer import PoseVisualizer
|
| 145 |
+
vis = PoseVisualizer()
|
| 146 |
+
ingest = _make_ingest(n=5)
|
| 147 |
+
empty_pose = Pose2DResult(
|
| 148 |
+
keypoints=[{} for _ in range(5)], fps=30.0, confidence=0.0, notes=""
|
| 149 |
+
)
|
| 150 |
+
out = str(tmp_path / "out.mp4")
|
| 151 |
+
result = vis.render_video(ingest, empty_pose, {"skeleton"}, out)
|
| 152 |
+
assert result is None
|
| 153 |
+
|
| 154 |
+
def test_last_velocities_set_after_render(self, tmp_path):
|
| 155 |
+
from formscout.agents.visualizer import PoseVisualizer
|
| 156 |
+
vis = PoseVisualizer()
|
| 157 |
+
out = str(tmp_path / "out.mp4")
|
| 158 |
+
vis.render_video(_make_ingest(n=5), _make_pose(n=5), {"skeleton"}, out)
|
| 159 |
+
assert len(vis.last_velocities) == 17
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
class TestBuildVelocitySummary:
|
| 163 |
+
def test_returns_markdown_table(self):
|
| 164 |
+
from formscout.agents.visualizer import build_velocity_summary, compute_joint_velocity
|
| 165 |
+
pose = _make_pose(n=10)
|
| 166 |
+
vels = compute_joint_velocity(pose.keypoints, fps=30.0)
|
| 167 |
+
result = build_velocity_summary(pose.keypoints, vels)
|
| 168 |
+
assert "|" in result
|
| 169 |
+
assert any(name in result for name in ["knee", "shoulder", "hip", "ankle"])
|
| 170 |
+
|
| 171 |
+
def test_empty_keypoints_returns_empty_string(self):
|
| 172 |
+
from formscout.agents.visualizer import build_velocity_summary
|
| 173 |
+
empty_kps = [{} for _ in range(5)]
|
| 174 |
+
vels = {j: [0.0] * 5 for j in range(17)}
|
| 175 |
+
result = build_velocity_summary(empty_kps, vels)
|
| 176 |
+
assert result == ""
|