Spaces:
Sleeping
Sleeping
Commit ·
cf93910
0
Parent(s):
Initial commit: SanketSetu - Sign Language Recognition System
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .dockerignore +29 -0
- .gitattributes +2 -0
- .github/workflows/deploy-backend.yml +61 -0
- .github/workflows/deploy-frontend.yml +45 -0
- CNN_Autoencoder_LightGBM/autoencoder_model.pkl +3 -0
- CNN_Autoencoder_LightGBM/autoencoder_model.pth +3 -0
- CNN_Autoencoder_LightGBM/lgbm_model.pkl +3 -0
- CNN_Autoencoder_LightGBM/lgbm_model.pth +3 -0
- CNN_PreTrained/cnn_model.pkl +3 -0
- CNN_PreTrained/cnn_model.pth +3 -0
- CNN_PreTrained/svm_model.pkl +3 -0
- CNN_PreTrained/svm_model.pth +3 -0
- Dockerfile +48 -0
- Mediapipe_XGBoost/model.pkl +3 -0
- Mediapipe_XGBoost/model.pth +3 -0
- README.md +108 -0
- SanketSetu_ Production-Grade Implementation Plan.md +99 -0
- TASKS.md +284 -0
- backend/.env.example +32 -0
- backend/app/__init__.py +1 -0
- backend/app/__pycache__/__init__.cpython-312.pyc +0 -0
- backend/app/__pycache__/config.cpython-312.pyc +0 -0
- backend/app/__pycache__/main.cpython-312.pyc +0 -0
- backend/app/__pycache__/schemas.cpython-312.pyc +0 -0
- backend/app/config.py +64 -0
- backend/app/inference/__init__.py +1 -0
- backend/app/inference/__pycache__/__init__.cpython-312.pyc +0 -0
- backend/app/inference/__pycache__/ensemble.cpython-312.pyc +0 -0
- backend/app/inference/__pycache__/pipeline_a.cpython-312.pyc +0 -0
- backend/app/inference/__pycache__/pipeline_b.cpython-312.pyc +0 -0
- backend/app/inference/__pycache__/pipeline_c.cpython-312.pyc +0 -0
- backend/app/inference/ensemble.py +138 -0
- backend/app/inference/pipeline_a.py +57 -0
- backend/app/inference/pipeline_b.py +59 -0
- backend/app/inference/pipeline_c.py +86 -0
- backend/app/main.py +280 -0
- backend/app/models/__init__.py +1 -0
- backend/app/models/__pycache__/__init__.cpython-312.pyc +0 -0
- backend/app/models/__pycache__/label_map.cpython-312.pyc +0 -0
- backend/app/models/__pycache__/loader.cpython-312.pyc +0 -0
- backend/app/models/label_map.py +58 -0
- backend/app/models/loader.py +188 -0
- backend/app/schemas.py +74 -0
- backend/requirements-dev.txt +4 -0
- backend/requirements.txt +12 -0
- backend/tests/__init__.py +0 -0
- backend/tests/__pycache__/__init__.cpython-312.pyc +0 -0
- backend/tests/__pycache__/conftest.cpython-312-pytest-9.0.2.pyc +0 -0
- backend/tests/__pycache__/test_pipeline_a.cpython-312-pytest-9.0.2.pyc +0 -0
- backend/tests/__pycache__/test_pipeline_b.cpython-312-pytest-9.0.2.pyc +0 -0
.dockerignore
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ── Python ────────────────────────────────────────────────────────────────────
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*.pyo
|
| 5 |
+
.venv/
|
| 6 |
+
*.egg-info/
|
| 7 |
+
.pytest_cache/
|
| 8 |
+
.mypy_cache/
|
| 9 |
+
dist/
|
| 10 |
+
|
| 11 |
+
# ── Node / Frontend ───────────────────────────────────────────────────────────
|
| 12 |
+
frontend/node_modules/
|
| 13 |
+
frontend/dist/
|
| 14 |
+
frontend/.env.local
|
| 15 |
+
|
| 16 |
+
# ── Git / Editor ──────────────────────────────────────────────────────────────
|
| 17 |
+
.git/
|
| 18 |
+
.gitignore
|
| 19 |
+
.vscode/
|
| 20 |
+
*.md
|
| 21 |
+
TASKS.md
|
| 22 |
+
|
| 23 |
+
# ── OS ────────────────────────────────────────────────────────────────────────
|
| 24 |
+
.DS_Store
|
| 25 |
+
Thumbs.db
|
| 26 |
+
|
| 27 |
+
# ── Model binary variants (keep only .pkl, not duplicate .pth) ───────────────
|
| 28 |
+
# Both extensions are identical — Docker only needs .pkl
|
| 29 |
+
**/*.pth
|
.gitattributes
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
.github/workflows/deploy-backend.yml
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Deploy Backend → Fly.io
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches: [main]
|
| 6 |
+
paths:
|
| 7 |
+
- 'backend/**'
|
| 8 |
+
- 'Dockerfile'
|
| 9 |
+
- '.dockerignore'
|
| 10 |
+
- 'fly.toml'
|
| 11 |
+
- 'Mediapipe_XGBoost/**'
|
| 12 |
+
- 'CNN_Autoencoder_LightGBM/**'
|
| 13 |
+
- 'CNN_PreTrained/**'
|
| 14 |
+
|
| 15 |
+
jobs:
|
| 16 |
+
test:
|
| 17 |
+
name: Run backend tests
|
| 18 |
+
runs-on: ubuntu-latest
|
| 19 |
+
steps:
|
| 20 |
+
- uses: actions/checkout@v4
|
| 21 |
+
|
| 22 |
+
- uses: actions/setup-python@v5
|
| 23 |
+
with:
|
| 24 |
+
python-version: '3.12'
|
| 25 |
+
cache: pip
|
| 26 |
+
cache-dependency-path: backend/requirements.txt
|
| 27 |
+
|
| 28 |
+
- name: Install deps
|
| 29 |
+
run: |
|
| 30 |
+
pip install -r backend/requirements.txt
|
| 31 |
+
pip install -r backend/requirements-dev.txt
|
| 32 |
+
|
| 33 |
+
- name: Run tests
|
| 34 |
+
working-directory: backend
|
| 35 |
+
env:
|
| 36 |
+
KERAS_BACKEND: tensorflow
|
| 37 |
+
TF_CPP_MIN_LOG_LEVEL: "3"
|
| 38 |
+
CUDA_VISIBLE_DEVICES: ""
|
| 39 |
+
TF_ENABLE_ONEDNN_OPTS: "0"
|
| 40 |
+
run: pytest tests/ -v --tb=short -q
|
| 41 |
+
# Note: tests will be skipped automatically if model .pkl files are absent
|
| 42 |
+
# (model artefacts are gitignored). Add them as GitHub Actions artifacts
|
| 43 |
+
# or use DVC/GCS to restore them in CI if you want full test coverage.
|
| 44 |
+
|
| 45 |
+
deploy:
|
| 46 |
+
name: Deploy to Fly.io
|
| 47 |
+
needs: test
|
| 48 |
+
runs-on: ubuntu-latest
|
| 49 |
+
environment: production
|
| 50 |
+
concurrency:
|
| 51 |
+
group: fly-deploy
|
| 52 |
+
cancel-in-progress: true
|
| 53 |
+
steps:
|
| 54 |
+
- uses: actions/checkout@v4
|
| 55 |
+
|
| 56 |
+
- uses: superfly/flyctl-actions/setup-flyctl@master
|
| 57 |
+
|
| 58 |
+
- name: Deploy
|
| 59 |
+
run: flyctl deploy --remote-only
|
| 60 |
+
env:
|
| 61 |
+
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
|
.github/workflows/deploy-frontend.yml
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Deploy Frontend → Vercel
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches: [main]
|
| 6 |
+
paths:
|
| 7 |
+
- 'frontend/**'
|
| 8 |
+
|
| 9 |
+
jobs:
|
| 10 |
+
build-and-deploy:
|
| 11 |
+
name: Build & Deploy
|
| 12 |
+
runs-on: ubuntu-latest
|
| 13 |
+
environment: production
|
| 14 |
+
steps:
|
| 15 |
+
- uses: actions/checkout@v4
|
| 16 |
+
|
| 17 |
+
- uses: actions/setup-node@v4
|
| 18 |
+
with:
|
| 19 |
+
node-version: '22'
|
| 20 |
+
cache: npm
|
| 21 |
+
cache-dependency-path: frontend/package-lock.json
|
| 22 |
+
|
| 23 |
+
- name: Install dependencies
|
| 24 |
+
working-directory: frontend
|
| 25 |
+
run: npm ci
|
| 26 |
+
|
| 27 |
+
- name: Type-check
|
| 28 |
+
working-directory: frontend
|
| 29 |
+
run: npx tsc --project tsconfig.app.json --noEmit
|
| 30 |
+
|
| 31 |
+
- name: Build
|
| 32 |
+
working-directory: frontend
|
| 33 |
+
env:
|
| 34 |
+
VITE_WS_URL: ${{ vars.VITE_WS_URL }}
|
| 35 |
+
VITE_API_URL: ${{ vars.VITE_API_URL }}
|
| 36 |
+
run: npm run build
|
| 37 |
+
|
| 38 |
+
- name: Deploy to Vercel
|
| 39 |
+
uses: amondnet/vercel-action@v25
|
| 40 |
+
with:
|
| 41 |
+
vercel-token: ${{ secrets.VERCEL_TOKEN }}
|
| 42 |
+
vercel-org-id: ${{ secrets.VERCEL_ORG_ID }}
|
| 43 |
+
vercel-project-id: ${{ secrets.VERCEL_PROJECT_ID }}
|
| 44 |
+
working-directory: frontend
|
| 45 |
+
vercel-args: '--prod'
|
CNN_Autoencoder_LightGBM/autoencoder_model.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5843688f059d26851774e553c4afddbc7c0f2f7fc048401b8447f290a63d2cbe
|
| 3 |
+
size 92934
|
CNN_Autoencoder_LightGBM/autoencoder_model.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3593536edda0328121d5f92fd186a8e40c341799bd9bb703e0e2ad155b6e7aeb
|
| 3 |
+
size 121321
|
CNN_Autoencoder_LightGBM/lgbm_model.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5e83d2bb3a18da0b3ccdd7afc5d044fa52c6e70c4e6090b312a622a866ee0008
|
| 3 |
+
size 3623126
|
CNN_Autoencoder_LightGBM/lgbm_model.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9a09f7b712da5f0e6b63e222e4ea938029567bd8cf496da7ad93752d54219b57
|
| 3 |
+
size 3626367
|
CNN_PreTrained/cnn_model.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:464df17407edea99db1b69c20e7ff718f6ceafb05f1bbeaacc889499e4cd920a
|
| 3 |
+
size 97136794
|
CNN_PreTrained/cnn_model.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6eb8e3419763c47b5ba2480ccaf9907e8d748602b26fe59c009b6112fa840ae5
|
| 3 |
+
size 146278905
|
CNN_PreTrained/svm_model.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cf21a17c1340b84359c3431fc4ae8eb05239e4e1ef58dd34ab775f53b9bc7f53
|
| 3 |
+
size 929927
|
CNN_PreTrained/svm_model.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e706ebf4588c580d0e6ac6f1554f9fd2eaef5564ee02f8022e3ca5f13bb8985b
|
| 3 |
+
size 1079865
|
Dockerfile
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 2 |
+
# SanketSetu Backend — Dockerfile
|
| 3 |
+
# Build context: repo root (SanketSetu/)
|
| 4 |
+
#
|
| 5 |
+
# docker build -t sanketsetu-backend .
|
| 6 |
+
# docker run -p 8000:8000 sanketsetu-backend
|
| 7 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 8 |
+
|
| 9 |
+
FROM python:3.12-slim AS base
|
| 10 |
+
|
| 11 |
+
# System libraries needed by OpenCV headless + Pillow
|
| 12 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 13 |
+
libgl1 libglib2.0-0 libgomp1 \
|
| 14 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 15 |
+
|
| 16 |
+
# ── Python dependencies (cached layer) ───────────────────────────────────────
|
| 17 |
+
WORKDIR /app
|
| 18 |
+
COPY backend/requirements.txt ./
|
| 19 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 20 |
+
|
| 21 |
+
# ── Application source ────────────────────────────────────────────────────────
|
| 22 |
+
COPY backend/app/ ./app/
|
| 23 |
+
|
| 24 |
+
# ── Model artefacts ───────────────────────────────────────────────────────────
|
| 25 |
+
# Copied to /models so the container is fully self-contained.
|
| 26 |
+
# Override at runtime with -e WEIGHTS_DIR=/mnt/models + bind-mount if preferred.
|
| 27 |
+
COPY Mediapipe_XGBoost/ /models/Mediapipe_XGBoost/
|
| 28 |
+
COPY CNN_Autoencoder_LightGBM/ /models/CNN_Autoencoder_LightGBM/
|
| 29 |
+
COPY CNN_PreTrained/ /models/CNN_PreTrained/
|
| 30 |
+
|
| 31 |
+
# ── Runtime environment ───────────────────────────────────────────────────────
|
| 32 |
+
ENV WEIGHTS_DIR=/models \
|
| 33 |
+
KERAS_BACKEND=tensorflow \
|
| 34 |
+
TF_CPP_MIN_LOG_LEVEL=3 \
|
| 35 |
+
CUDA_VISIBLE_DEVICES="" \
|
| 36 |
+
TF_ENABLE_ONEDNN_OPTS=0 \
|
| 37 |
+
OMP_NUM_THREADS=4 \
|
| 38 |
+
PYTHONDONTWRITEBYTECODE=1 \
|
| 39 |
+
PYTHONUNBUFFERED=1
|
| 40 |
+
|
| 41 |
+
EXPOSE 8000
|
| 42 |
+
|
| 43 |
+
# ── Health-check ──────────────────────────────────────────────────────────────
|
| 44 |
+
# Wait up to 3 minutes for models to load before marking the container healthy.
|
| 45 |
+
HEALTHCHECK --interval=30s --timeout=10s --start-period=180s --retries=3 \
|
| 46 |
+
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health', timeout=5)"
|
| 47 |
+
|
| 48 |
+
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
Mediapipe_XGBoost/model.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a736b83df3e56b69b0f1c11f018257760746969d6598d90ea2a60c78f8305883
|
| 3 |
+
size 1711525
|
Mediapipe_XGBoost/model.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2ff5f1cbc121be57f2a7fe04b38925ea740fe79602a6205ca09a748cb0f20b81
|
| 3 |
+
size 1895969
|
README.md
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SanketSetu
|
| 2 |
+
|
| 3 |
+
A real-time sign language recognition system using machine learning and computer vision.
|
| 4 |
+
|
| 5 |
+
## Overview
|
| 6 |
+
|
| 7 |
+
SanketSetu is an intelligent sign language interpretation system that provides real-time recognition and translation of sign language gestures using advanced machine learning models and MediaPipe hand tracking.
|
| 8 |
+
|
| 9 |
+
## Project Structure
|
| 10 |
+
|
| 11 |
+
```
|
| 12 |
+
├── backend/ # FastAPI backend server
|
| 13 |
+
│ ├── app/ # Main application code
|
| 14 |
+
│ │ ├── inference/ # ML inference pipelines
|
| 15 |
+
│ │ └── models/ # Model loading and management
|
| 16 |
+
│ └── tests/ # Backend tests
|
| 17 |
+
├── frontend/ # React + TypeScript frontend
|
| 18 |
+
│ └── src/
|
| 19 |
+
│ ├── components/ # React components
|
| 20 |
+
│ ├── hooks/ # Custom React hooks
|
| 21 |
+
│ └── lib/ # Utility libraries
|
| 22 |
+
├── CNN_Autoencoder_LightGBM/ # CNN Autoencoder + LightGBM model
|
| 23 |
+
├── CNN_PreTrained/ # CNN + SVM model
|
| 24 |
+
└── Mediapipe_XGBoost/ # MediaPipe + XGBoost model
|
| 25 |
+
```
|
| 26 |
+
|
| 27 |
+
## Features
|
| 28 |
+
|
| 29 |
+
- Real-time sign language gesture recognition
|
| 30 |
+
- Multiple ML model ensemble approach
|
| 31 |
+
- WebSocket-based real-time communication
|
| 32 |
+
- MediaPipe hand landmark tracking
|
| 33 |
+
- Interactive webcam feed with visual feedback
|
| 34 |
+
- Prediction confidence display
|
| 35 |
+
|
| 36 |
+
## Tech Stack
|
| 37 |
+
|
| 38 |
+
### Backend
|
| 39 |
+
- FastAPI
|
| 40 |
+
- Python 3.x
|
| 41 |
+
- PyTorch
|
| 42 |
+
- LightGBM
|
| 43 |
+
- XGBoost
|
| 44 |
+
- MediaPipe
|
| 45 |
+
|
| 46 |
+
### Frontend
|
| 47 |
+
- React
|
| 48 |
+
- TypeScript
|
| 49 |
+
- Vite
|
| 50 |
+
- TailwindCSS
|
| 51 |
+
|
| 52 |
+
## Getting Started
|
| 53 |
+
|
| 54 |
+
### Prerequisites
|
| 55 |
+
- Python 3.8+
|
| 56 |
+
- Node.js 16+
|
| 57 |
+
- npm or yarn
|
| 58 |
+
|
| 59 |
+
### Backend Setup
|
| 60 |
+
|
| 61 |
+
```bash
|
| 62 |
+
cd backend
|
| 63 |
+
pip install -r requirements.txt
|
| 64 |
+
python -m app.main
|
| 65 |
+
```
|
| 66 |
+
|
| 67 |
+
### Frontend Setup
|
| 68 |
+
|
| 69 |
+
```bash
|
| 70 |
+
cd frontend
|
| 71 |
+
npm install
|
| 72 |
+
npm run dev
|
| 73 |
+
```
|
| 74 |
+
|
| 75 |
+
## Development
|
| 76 |
+
|
| 77 |
+
Run the development servers:
|
| 78 |
+
|
| 79 |
+
```bash
|
| 80 |
+
# Start both frontend and backend
|
| 81 |
+
.\start.ps1
|
| 82 |
+
```
|
| 83 |
+
|
| 84 |
+
## Docker
|
| 85 |
+
|
| 86 |
+
Build and run using Docker:
|
| 87 |
+
|
| 88 |
+
```bash
|
| 89 |
+
docker build -t sanketsetu .
|
| 90 |
+
docker run -p 8000:8000 sanketsetu
|
| 91 |
+
```
|
| 92 |
+
|
| 93 |
+
## Testing
|
| 94 |
+
|
| 95 |
+
Run backend tests:
|
| 96 |
+
|
| 97 |
+
```bash
|
| 98 |
+
cd backend
|
| 99 |
+
pytest
|
| 100 |
+
```
|
| 101 |
+
|
| 102 |
+
## License
|
| 103 |
+
|
| 104 |
+
All rights reserved.
|
| 105 |
+
|
| 106 |
+
## Author
|
| 107 |
+
|
| 108 |
+
Devrajsinh Gohil (devrajsinh2012)
|
SanketSetu_ Production-Grade Implementation Plan.md
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SanketSetu: Production-Grade Implementation Plan
|
| 2 |
+
|
| 3 |
+
## 1. Executive Summary
|
| 4 |
+
**SanketSetu** (Bridge of Signs) is a high-performance, real-time Gujarati Sign Language (GSL) recognition system. This document outlines a production-ready architecture designed to run entirely on **free-tier cloud services**. The system leverages a decoupled architecture with a React-based interactive frontend and a FastAPI backend, ensuring low-latency inference and a seamless user experience.
|
| 5 |
+
|
| 6 |
+
---
|
| 7 |
+
|
| 8 |
+
## 2. High-Level System Architecture
|
| 9 |
+
The system follows a modern microservices-inspired pattern to ensure scalability and ease of updates.
|
| 10 |
+
|
| 11 |
+
| Component | Technology | Role | Hosting (Free Tier) |
|
| 12 |
+
| :--- | :--- | :--- | :--- |
|
| 13 |
+
| **Frontend** | React + Vite + TS | User interface, webcam capture, real-time feedback | **Vercel** |
|
| 14 |
+
| **Backend API** | FastAPI (Python) | WebSocket management, API gateway, logic | **Fly.io** |
|
| 15 |
+
| **Inference Engine** | ONNX Runtime / XGBoost | High-speed model execution | **Fly.io** (Internal) |
|
| 16 |
+
| **Storage** | Cloudflare R2 | S3-compatible storage for model weights | **Cloudflare** |
|
| 17 |
+
| **Real-time** | WebSockets (WSS) | Low-latency frame-by-frame data transfer | N/A |
|
| 18 |
+
|
| 19 |
+
---
|
| 20 |
+
|
| 21 |
+
## 3. Backend Implementation Details
|
| 22 |
+
|
| 23 |
+
### 3.1 API Design (FastAPI)
|
| 24 |
+
The backend is built for speed. It handles binary data from WebSockets to minimize overhead.
|
| 25 |
+
|
| 26 |
+
* **WebSocket Protocol**: The client sends a stream of normalized hand landmark coordinates (63 points per frame) extracted locally via MediaPipe. This reduces bandwidth significantly compared to sending raw video frames.
|
| 27 |
+
* **Concurrency**: Uses `asyncio` to handle multiple simultaneous user connections without blocking the event loop.
|
| 28 |
+
* **Model Loading**: Models are loaded into memory at startup using a Singleton pattern to ensure zero-latency on the first request.
|
| 29 |
+
|
| 30 |
+
### 3.2 Model Serving Strategy
|
| 31 |
+
1. **Primary Model**: The **XGBoost** model is used as the default due to its sub-millisecond inference time.
|
| 32 |
+
2. **Backup/Ensemble**: The system can optionally query the **CNN+SVM** or **LGBM** models for high-confidence verification if the XGBoost score is below a certain threshold.
|
| 33 |
+
3. **Optimization**: Models are converted to **ONNX** format to leverage the ONNX Runtime's hardware-specific optimizations, even on free-tier CPU instances.
|
| 34 |
+
|
| 35 |
+
---
|
| 36 |
+
|
| 37 |
+
## 4. Frontend & Interactive UI/UX
|
| 38 |
+
|
| 39 |
+
The frontend is designed to be "cool," responsive, and highly interactive, providing users with a "futuristic" feel.
|
| 40 |
+
|
| 41 |
+
### 4.1 Tech Stack
|
| 42 |
+
* **Styling**: Tailwind CSS for rapid, modern UI development.
|
| 43 |
+
* **Animations**: Framer Motion for smooth transitions, layout changes, and interactive elements.
|
| 44 |
+
* **Icons**: Lucide React for a clean, consistent icon set.
|
| 45 |
+
|
| 46 |
+
### 4.2 Key UI Features
|
| 47 |
+
* **Glassmorphism Design**: Use of semi-transparent backgrounds with blur effects for a modern look.
|
| 48 |
+
* **Interactive Landmark Overlay**: A canvas overlay on the webcam feed that draws the 21 hand landmarks in real-time. Landmarks will "glow" when a sign is successfully recognized.
|
| 49 |
+
* **Dynamic Prediction HUD**: A Head-Up Display (HUD) style interface that shows the current prediction, confidence level, and a history of recently detected signs.
|
| 50 |
+
* **Responsive Layout**: Fully functional on mobile and desktop, with optimized camera controls for both.
|
| 51 |
+
|
| 52 |
+
### 4.3 User Experience Flow
|
| 53 |
+
1. **Onboarding**: A quick, animated guide on how to position the hand for best results.
|
| 54 |
+
2. **Calibration**: A brief "Ready?" state that ensures the lighting and hand distance are optimal.
|
| 55 |
+
3. **Real-time Translation**: Instant feedback as the user signs, with the translated Gujarati text appearing in a stylized "speech bubble" or text box.
|
| 56 |
+
|
| 57 |
+
---
|
| 58 |
+
|
| 59 |
+
## 4. Deployment & DevOps
|
| 60 |
+
|
| 61 |
+
### 4.1 Continuous Integration/Deployment (CI/CD)
|
| 62 |
+
Using **GitHub Actions**, the project will follow a strict deployment pipeline:
|
| 63 |
+
1. **Lint & Test**: Ensure code quality and run unit tests for ML logic.
|
| 64 |
+
2. **Build**: Create optimized production builds for the React app and Dockerize the FastAPI backend.
|
| 65 |
+
3. **Deploy**:
|
| 66 |
+
* Frontend automatically pushes to **Vercel**.
|
| 67 |
+
* Backend pushes to **Fly.io** using `flyctl`.
|
| 68 |
+
|
| 69 |
+
### 4.2 Scalability & Cost Management
|
| 70 |
+
* **Scale-to-Zero**: The backend on Fly.io can be configured to sleep when not in use to preserve free-tier resources.
|
| 71 |
+
* **CDN Caching**: Vercel's Edge Network will cache all static assets, ensuring fast load times globally.
|
| 72 |
+
|
| 73 |
+
---
|
| 74 |
+
|
| 75 |
+
## 5. Implementation Roadmap
|
| 76 |
+
|
| 77 |
+
### Phase 1: Core Backend & ML Integration
|
| 78 |
+
- [ ] Set up FastAPI project structure.
|
| 79 |
+
- [ ] Implement WebSocket handler for landmark data.
|
| 80 |
+
- [ ] Integrate the trained XGBoost model for real-time inference.
|
| 81 |
+
|
| 82 |
+
### Phase 2: Advanced Frontend Development
|
| 83 |
+
- [ ] Initialize Vite + React project with Tailwind.
|
| 84 |
+
- [ ] Implement webcam capture and MediaPipe landmark extraction (client-side).
|
| 85 |
+
- [ ] Create the interactive HUD and glassmorphism UI.
|
| 86 |
+
|
| 87 |
+
### Phase 3: Production Hardening
|
| 88 |
+
- [ ] Set up GitHub Actions for automated deployment.
|
| 89 |
+
- [ ] Implement error handling for low-bandwidth scenarios.
|
| 90 |
+
- [ ] Finalize documentation and user guide.
|
| 91 |
+
|
| 92 |
+
---
|
| 93 |
+
|
| 94 |
+
## 6. References
|
| 95 |
+
[1] [FastAPI Documentation](https://fastapi.tiangolo.com/) - High-performance web framework for building APIs.
|
| 96 |
+
[2] [MediaPipe Hands](https://developers.google.com/mediapipe/solutions/vision/hand_landmarker) - Real-time hand landmark detection.
|
| 97 |
+
[3] [Framer Motion](https://www.framer.com/motion/) - A production-ready motion library for React.
|
| 98 |
+
[4] [Fly.io Free Tier](https://fly.io/docs/about/pricing/) - Details on free-tier resource allocation.
|
| 99 |
+
[5] [Vercel Deployment](https://vercel.com/docs/deployments/overview) - Global CDN and hosting for frontend applications.
|
TASKS.md
ADDED
|
@@ -0,0 +1,284 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SanketSetu — Execution TODO & Implementation Tracker
|
| 2 |
+
|
| 3 |
+
## Model Analysis (Reviewed 2026-03-02)
|
| 4 |
+
|
| 5 |
+
All 5 model files inspected. Three distinct inference pipelines exist:
|
| 6 |
+
|
| 7 |
+
| Pipeline | Files | Input | Process | Output |
|
| 8 |
+
|---|---|---|---|---|
|
| 9 |
+
| **A — Primary (Fastest)** | `Mediapipe_XGBoost/model.pkl` | 63 MediaPipe coords (21 landmarks × x,y,z) | XGBClassifier (50 trees) | 34-class probability |
|
| 10 |
+
| **B — Autoencoder + LGBM** | `CNN_Autoencoder_LightGBM/autoencoder_model.pkl` + `lgbm_model.pkl` | 63 MediaPipe coords | Encoder (63→32→**16** bottleneck) + LGBMClassifier | 34-class probability |
|
| 11 |
+
| **C — Vision CNN + SVM** | `CNN_PreTrained/cnn_model.pkl` + `svm_model.pkl` | 128×128×3 RGB image | ResNet50-based CNN (179 layers) → 256 features + SVC(C=10) | 34-class probability w/ probability=True |
|
| 12 |
+
|
| 13 |
+
### Key Architecture Facts
|
| 14 |
+
- **34 classes** (Gujarati Sign Language alphabet + digits, labels 0–33)
|
| 15 |
+
- **Pipeline A** input: 63 floats — directly from MediaPipe `hand_landmarks` (x, y, z per landmark, flattened)
|
| 16 |
+
- **Pipeline B** input: same 63 floats → takes only the encoder half (first 3 Dense layers, output of `dense_1` layer = 16 features)
|
| 17 |
+
- **Pipeline C** input: 128×128 BGR/RGB cropped hand image, normalized to [0,1]
|
| 18 |
+
- All `.pth` files are identical copies of the `.pkl` files (same objects, different extension)
|
| 19 |
+
- Model quality strategy: A is primary (sub-ms); if confidence < threshold, query B or C for ensemble
|
| 20 |
+
|
| 21 |
+
---
|
| 22 |
+
|
| 23 |
+
## Project Folder Structure to Create
|
| 24 |
+
|
| 25 |
+
```
|
| 26 |
+
SanketSetu/
|
| 27 |
+
├── backend/ ← FastAPI server
|
| 28 |
+
│ ├── app/
|
| 29 |
+
│ │ ├── main.py ← FastAPI entry, WebSocket + REST
|
| 30 |
+
│ │ ├── models/
|
| 31 |
+
│ │ │ ├── loader.py ← Singleton model loader
|
| 32 |
+
│ │ │ └── label_map.py ← 0–33 → Gujarati sign name mapping
|
| 33 |
+
│ │ ├── inference/
|
| 34 |
+
│ │ │ ├── pipeline_a.py ← XGBoost inference (63 landmarks)
|
| 35 |
+
│ │ │ ├── pipeline_b.py ← Autoencoder encoder + LightGBM
|
| 36 |
+
│ │ │ ├── pipeline_c.py ← ResNet CNN + SVM (image-based)
|
| 37 |
+
│ │ │ └── ensemble.py ← Confidence-weighted ensemble logic
|
| 38 |
+
│ │ ├── schemas.py ← Pydantic request/response models
|
| 39 |
+
│ │ └── config.py ← Settings (confidence threshold, etc.)
|
| 40 |
+
│ ├── weights/ ← Symlink or copy of model pkl files
|
| 41 |
+
│ ├── requirements.txt
|
| 42 |
+
│ ├── Dockerfile
|
| 43 |
+
│ └── fly.toml
|
| 44 |
+
│
|
| 45 |
+
├── frontend/ ← Vite + React + TS
|
| 46 |
+
│ ├── src/
|
| 47 |
+
│ │ ├── components/
|
| 48 |
+
│ │ │ ├── WebcamFeed.tsx ← Webcam + canvas landmark overlay
|
| 49 |
+
│ │ │ ├── LandmarkCanvas.tsx ← Draws 21 hand points + connections
|
| 50 |
+
│ │ │ ├── PredictionHUD.tsx ← Live sign, confidence bar, history
|
| 51 |
+
│ │ │ ├── OnboardingGuide.tsx ← Animated intro wizard
|
| 52 |
+
│ │ │ └── Calibration.tsx ← Lighting/distance check UI
|
| 53 |
+
│ │ ├── hooks/
|
| 54 |
+
│ │ │ ├── useWebSocket.ts ← WS connection, send/receive
|
| 55 |
+
│ │ │ ├── useMediaPipe.ts ← MediaPipe Hands JS integration
|
| 56 |
+
│ │ │ └── useWebcam.ts ← Camera permissions + stream
|
| 57 |
+
│ │ ├── lib/
|
| 58 |
+
│ │ │ └── landmarkUtils.ts ← Landmark normalization (mirror XGBoost preprocessing)
|
| 59 |
+
│ │ ├── App.tsx
|
| 60 |
+
│ │ └── main.tsx
|
| 61 |
+
│ ├── public/
|
| 62 |
+
│ ├── index.html
|
| 63 |
+
│ ├── tailwind.config.ts
|
| 64 |
+
│ ├── vite.config.ts
|
| 65 |
+
│ └── package.json
|
| 66 |
+
│
|
| 67 |
+
├── CNN_Autoencoder_LightGBM/ ← (existing)
|
| 68 |
+
├── CNN_PreTrained/ ← (existing)
|
| 69 |
+
├── Mediapipe_XGBoost/ ← (existing)
|
| 70 |
+
└── .github/
|
| 71 |
+
└── workflows/
|
| 72 |
+
├── deploy-backend.yml
|
| 73 |
+
└── deploy-frontend.yml
|
| 74 |
+
```
|
| 75 |
+
|
| 76 |
+
---
|
| 77 |
+
|
| 78 |
+
## Phase 1 — Backend Core (FastAPI + Model Integration)
|
| 79 |
+
|
| 80 |
+
### 1.1 Project Bootstrap
|
| 81 |
+
- [x] Create `backend/` folder and `app/` package structure
|
| 82 |
+
- [x] Create `backend/requirements.txt` with: `fastapi`, `uvicorn[standard]`, `websockets`, `xgboost`, `lightgbm`, `scikit-learn`, `keras==3.13.2`, `tensorflow-cpu`, `numpy`, `opencv-python-headless`, `pillow`, `python-dotenv`
|
| 83 |
+
- [x] Create `backend/app/config.py` — confidence threshold (default 0.7), WebSocket max connections, pipeline mode (A/B/C/ensemble)
|
| 84 |
+
- [x] Create `backend/app/models/label_map.py` — map class indices 0–33 to Gujarati sign names
|
| 85 |
+
|
| 86 |
+
### 1.2 Model Loader (Singleton)
|
| 87 |
+
- [x] Create `backend/app/models/loader.py`
|
| 88 |
+
- Load `model.pkl` (XGBoost) at startup
|
| 89 |
+
- Load `autoencoder_model.pkl` (extract encoder layers only: input → dense → dense_1) and `lgbm_model.pkl`
|
| 90 |
+
- Load `cnn_model.pkl` (full ResNet50 feature extractor, strip any classification head) and `svm_model.pkl`
|
| 91 |
+
- Expose `ModelStore` singleton accessed via `get_model_store()` dependency
|
| 92 |
+
- Log load times for each model
|
| 93 |
+
|
| 94 |
+
### 1.3 Pipeline A — XGBoost (Primary, Landmarks)
|
| 95 |
+
- [x] Create `backend/app/inference/pipeline_a.py`
|
| 96 |
+
- Input: `List[float]` of length 63 (x,y,z per landmark, already normalized by MediaPipe)
|
| 97 |
+
- Output: `{"sign": str, "confidence": float, "probabilities": List[float]}`
|
| 98 |
+
- Use `model.predict_proba(np.array(landmarks).reshape(1,-1))[0]`
|
| 99 |
+
- Return `classes_[argmax]` and `max(probabilities)` as confidence
|
| 100 |
+
|
| 101 |
+
### 1.4 Pipeline B — Autoencoder Encoder + LightGBM
|
| 102 |
+
- [x] Create `backend/app/inference/pipeline_b.py`
|
| 103 |
+
- Build encoder-only submodel: `encoder = keras.Model(inputs=model.input, outputs=model.layers[2].output)` (output of `dense_1`, the 16-D bottleneck)
|
| 104 |
+
- Input: 63 MediaPipe coords
|
| 105 |
+
- Encode: `features = encoder.predict(np.array(landmarks).reshape(1,-1))[0]` → shape (16,)
|
| 106 |
+
- Classify: `lgbm.predict_proba(features.reshape(1,-1))[0]`
|
| 107 |
+
|
| 108 |
+
### 1.5 Pipeline C — CNN + SVM (Image-based)
|
| 109 |
+
- [x] Create `backend/app/inference/pipeline_c.py`
|
| 110 |
+
- Input: base64-encoded JPEG or raw bytes of the cropped hand region (128×128 px)
|
| 111 |
+
- Decode → numpy array (128,128,3) uint8 → normalize to float32 [0,1]
|
| 112 |
+
- `features = cnn_model.predict(img[np.newaxis])[0]` → shape (256,)
|
| 113 |
+
- `proba = svm.predict_proba(features.reshape(1,-1))[0]`
|
| 114 |
+
- Note: CNN inference is slower (~50–200ms on CPU); only call when Pipeline A confidence < threshold
|
| 115 |
+
|
| 116 |
+
### 1.6 Ensemble Logic
|
| 117 |
+
- [x] Create `backend/app/inference/ensemble.py`
|
| 118 |
+
- Call Pipeline A first
|
| 119 |
+
- If `confidence < config.THRESHOLD` (default 0.7), call Pipeline B
|
| 120 |
+
- If still below threshold and image data available, call Pipeline C
|
| 121 |
+
- Final result: weighted average of probabilities from each pipeline that was called
|
| 122 |
+
- Return the top predicted class and ensemble confidence score
|
| 123 |
+
|
| 124 |
+
### 1.7 WebSocket Handler
|
| 125 |
+
- [x] Create `backend/app/main.py` with FastAPI app
|
| 126 |
+
- [x] Implement `GET /health` — returns `{"status": "ok", "models_loaded": true}`
|
| 127 |
+
- [x] Implement `WS /ws/landmarks` — primary endpoint
|
| 128 |
+
- Client sends JSON: `{"landmarks": [63 floats], "session_id": "..."}`
|
| 129 |
+
- Server responds: `{"sign": "...", "confidence": 0.95, "pipeline": "A", "label_index": 12}`
|
| 130 |
+
- Handle disconnect gracefully
|
| 131 |
+
- [x] Implement `WS /ws/image` — optional image-based endpoint for Pipeline C
|
| 132 |
+
- Client sends JSON: `{"image_b64": "...", "session_id": "..."}`
|
| 133 |
+
- [x] Implement `POST /api/predict` — REST fallback for non-WS clients
|
| 134 |
+
- Body: `{"landmarks": [63 floats]}`
|
| 135 |
+
- Returns same response schema as WS
|
| 136 |
+
|
| 137 |
+
### 1.8 Schemas & Validation
|
| 138 |
+
- [x] Create `backend/app/schemas.py`
|
| 139 |
+
- `LandmarkMessage(BaseModel)`: `landmarks: List[float]` (must be length 63), `session_id: str`
|
| 140 |
+
- `ImageMessage(BaseModel)`: `image_b64: str`, `session_id: str`
|
| 141 |
+
- `PredictionResponse(BaseModel)`: `sign: str`, `confidence: float`, `pipeline: str`, `label_index: int`, `probabilities: Optional[List[float]]`
|
| 142 |
+
|
| 143 |
+
### 1.9 CORS & Middleware
|
| 144 |
+
- [x] Configure CORS for Vercel frontend domain + localhost:5173
|
| 145 |
+
- [x] Add request logging middleware (log session_id, pipeline used, latency ms)
|
| 146 |
+
- [x] Add global exception handler returning proper JSON errors
|
| 147 |
+
|
| 148 |
+
---
|
| 149 |
+
|
| 150 |
+
## Phase 2 — Frontend (React + Vite + Tailwind + Framer Motion)
|
| 151 |
+
|
| 152 |
+
### 2.1 Project Bootstrap
|
| 153 |
+
- [x] Run `npm create vite@latest frontend -- --template react-ts` inside `SanketSetu/`
|
| 154 |
+
- [x] Install deps: `tailwindcss`, `framer-motion`, `lucide-react`, `@mediapipe/tasks-vision`
|
| 155 |
+
- [x] Configure Tailwind with custom palette (dark neon-cyan glassmorphism theme)
|
| 156 |
+
- [x] Set up `vite.config.ts` proxy: `/api` → backend URL, `/ws` → backend WS URL
|
| 157 |
+
|
| 158 |
+
### 2.2 Webcam Hook (`useWebcam.ts`)
|
| 159 |
+
- [x] Request `getUserMedia({ video: { width: 1280, height: 720 } })`
|
| 160 |
+
- [x] Expose `videoRef`, `isReady`, `error`, `switchCamera()` (for mobile front/back toggle)
|
| 161 |
+
- [x] Handle permission denied state with instructional UI
|
| 162 |
+
|
| 163 |
+
### 2.3 MediaPipe Hook (`useMediaPipe.ts`)
|
| 164 |
+
- [x] Initialize `HandLandmarker` from `@mediapipe/tasks-vision` (WASM backend)
|
| 165 |
+
- [x] Process video frames at target 30fps using `requestAnimationFrame`
|
| 166 |
+
- [x] Extract `landmarks[0]` (first hand) → flatten to 63 floats `[x0,y0,z0, x1,y1,z1, ...]`
|
| 167 |
+
- [x] Normalize: subtract wrist (landmark 0) position to make translation-invariant — **must match training preprocessing**
|
| 168 |
+
- [x] Expose `landmarks: number[] | null`, `handedness: string`, `isDetecting: boolean`
|
| 169 |
+
|
| 170 |
+
### 2.4 WebSocket Hook (`useWebSocket.ts`)
|
| 171 |
+
- [x] Connect to `wss://backend-url/ws/landmarks` on mount
|
| 172 |
+
- [x] Auto-reconnect with exponential backoff on disconnect
|
| 173 |
+
- [x] `sendLandmarks(landmarks: number[])` — throttled to max 15 sends/sec
|
| 174 |
+
- [x] Expose `lastPrediction: PredictionResponse | null`, `isConnected: boolean`, `latency: number`
|
| 175 |
+
|
| 176 |
+
### 2.5 Landmark Canvas (`LandmarkCanvas.tsx`)
|
| 177 |
+
- [x] Overlay `<canvas>` on top of `<video>` with `position: absolute`
|
| 178 |
+
- [x] Draw 21 hand landmark dots (cyan glow: `shadowBlur`, `shadowColor`)
|
| 179 |
+
- [x] Draw 21 bone connections following MediaPipe hand topology (finger segments)
|
| 180 |
+
- [x] On successful prediction: animate landmarks to pulse/glow with Framer Motion spring
|
| 181 |
+
- [x] Use `requestAnimationFrame` for smooth 60fps rendering
|
| 182 |
+
|
| 183 |
+
### 2.6 Prediction HUD (`PredictionHUD.tsx`)
|
| 184 |
+
- [x] Glassmorphism card: `backdrop-blur`, `bg-white/10`, `border-white/20`
|
| 185 |
+
- [x] Large Gujarati sign name (mapped from label index)
|
| 186 |
+
- [x] Confidence bar: animated width transition via Framer Motion `animate={{ width: confidence% }}`
|
| 187 |
+
- [x] Color coding: green (>85%), yellow (60–85%), red (<60%)
|
| 188 |
+
- [x] Rolling history list: last 10 recognized signs (Framer Motion `AnimatePresence` for enter/exit)
|
| 189 |
+
- [x] Pipeline badge: shows which pipeline (A/B/C) produced the result
|
| 190 |
+
- [x] Latency display: shows WS round-trip time in ms
|
| 191 |
+
|
| 192 |
+
### 2.7 Onboarding Guide (`OnboardingGuide.tsx`)
|
| 193 |
+
- [x] 3-step animated wizard using Framer Motion page transitions
|
| 194 |
+
1. "Position your hand 30–60cm from camera"
|
| 195 |
+
2. "Ensure good lighting, avoid dark backgrounds"
|
| 196 |
+
3. "Show signs clearly — palm facing camera"
|
| 197 |
+
- [x] Skip button + "Don't show again" (localStorage)
|
| 198 |
+
|
| 199 |
+
### 2.8 Calibration Screen (`Calibration.tsx`)
|
| 200 |
+
- [x] Brief 2-second "Ready?" screen after onboarding
|
| 201 |
+
- [x] Check: hand detected by MediaPipe → show green checkmark animation
|
| 202 |
+
- [x] Auto-transitions to main translation view when hand is stable for 1 second
|
| 203 |
+
|
| 204 |
+
### 2.9 Main App Layout (`App.tsx`)
|
| 205 |
+
- [x] Full-screen dark background with subtle animated gradient
|
| 206 |
+
- [x] Three-panel layout (desktop): webcam | HUD | history
|
| 207 |
+
- [x] Mobile: stacked layout with webcam top, HUD bottom
|
| 208 |
+
- [x] Header: "SanketSetu | સંકેત-સેતુ" with glowing text effect
|
| 209 |
+
- [x] Settings gear icon → modal for pipeline selection (A / B / C / Ensemble), confidence threshold slider
|
| 210 |
+
|
| 211 |
+
---
|
| 212 |
+
|
| 213 |
+
## Phase 3 — Dockerization & Deployment
|
| 214 |
+
|
| 215 |
+
### 3.1 Backend Dockerfile
|
| 216 |
+
- [x] Create `Dockerfile` (repo root, build context includes models)
|
| 217 |
+
- [x] Add `.dockerignore` (excludes `.venv`, `node_modules`, `*.pth`, tests)
|
| 218 |
+
- [ ] Test locally: `docker build -t sanketsetu-backend . && docker run -p 8000:8000 sanketsetu-backend`
|
| 219 |
+
|
| 220 |
+
### 3.2 Fly.io Configuration
|
| 221 |
+
- [x] Create `fly.toml` (repo root, region=maa, port 8000, shared-cpu-2x)
|
| 222 |
+
- [x] Note: Keras/TF will increase Docker image size — use `tensorflow-cpu` to keep slim
|
| 223 |
+
- [ ] Set secrets via `flyctl secrets set` for any API keys
|
| 224 |
+
- [ ] Run: `flyctl deploy --dockerfile Dockerfile`
|
| 225 |
+
|
| 226 |
+
### 3.3 Vercel Frontend Deployment
|
| 227 |
+
- [x] Create `frontend/vercel.json` with SPA rewrite + WASM Content-Type header
|
| 228 |
+
- [x] Add `VITE_WS_URL` and `VITE_API_URL` to Vercel environment variables (via CI vars)
|
| 229 |
+
- [ ] Ensure `@mediapipe/tasks-vision` WASM files are served correctly (add to `public/`)
|
| 230 |
+
|
| 231 |
+
### 3.4 GitHub Actions CI/CD
|
| 232 |
+
- [x] Create `.github/workflows/deploy-backend.yml`
|
| 233 |
+
- Triggers on push to `main` when `backend/**` changes
|
| 234 |
+
- Steps: checkout → setup Python → run tests → `flyctl deploy`
|
| 235 |
+
- [x] Create `.github/workflows/deploy-frontend.yml`
|
| 236 |
+
- Triggers on push to `main` when `frontend/**` changes
|
| 237 |
+
- Steps: checkout → `npm ci` → tsc → `npm run build` → Vercel CLI deploy
|
| 238 |
+
|
| 239 |
+
---
|
| 240 |
+
|
| 241 |
+
## Phase 4 — Testing & Hardening
|
| 242 |
+
|
| 243 |
+
### 4.1 Backend Tests
|
| 244 |
+
- [x] `tests/test_pipeline_a.py` — 8 unit tests, XGBoost inference (4s)
|
| 245 |
+
- [x] `tests/test_pipeline_b.py` — 6 unit tests, encoder + LightGBM (49s)
|
| 246 |
+
- [x] `tests/test_pipeline_c.py` — 7 unit tests, CNN + SVM with real 128×128 images (14s)
|
| 247 |
+
- [x] `tests/test_websocket.py` — 7 integration tests, health + REST + WS round-trip
|
| 248 |
+
|
| 249 |
+
### 4.2 Frontend Error Handling
|
| 250 |
+
- [ ] No-camera fallback UI (file upload for image mode)
|
| 251 |
+
- [x] WS reconnecting banner (red banner when `!isConnected && stage === 'running'`)
|
| 252 |
+
- [x] Low-bandwidth mode: reduce send rate to 5fps if latency > 500ms + yellow "LB" badge in HUD
|
| 253 |
+
- [x] MediaPipe WASM load failure fallback message (shown in header via `mpError`)
|
| 254 |
+
|
| 255 |
+
### 4.3 Label Map (Critical)
|
| 256 |
+
- [ ] Create `backend/app/models/label_map.py` mapping classes 0–33 to actual Gujarati signs
|
| 257 |
+
- You need to confirm the exact mapping used during training (check your original dataset/notebook)
|
| 258 |
+
- Placeholder: `LABEL_MAP = { 0: "ક", 1: "ખ", ... , 33: "?" }`
|
| 259 |
+
- This file must exactly mirror what was used in training
|
| 260 |
+
|
| 261 |
+
---
|
| 262 |
+
|
| 263 |
+
## Execution Order (Start Here)
|
| 264 |
+
|
| 265 |
+
```
|
| 266 |
+
Week 1: Phase 1.1 → 1.3 → 1.7 (get WS working with Pipeline A alone, test in browser)
|
| 267 |
+
Week 2: Phase 1.4 → 1.5 → 1.6 (add other pipelines + ensemble)
|
| 268 |
+
Week 3: Phase 2.1 → 2.2 → 2.3 → 2.4 (React skeleton + WS connected)
|
| 269 |
+
Week 4: Phase 2.5 → 2.6 → 2.7 → 2.8 → 2.9 (full UI)
|
| 270 |
+
Week 5: Phase 3 + 4 (deploy + tests)
|
| 271 |
+
```
|
| 272 |
+
|
| 273 |
+
---
|
| 274 |
+
|
| 275 |
+
## Critical Decision Points
|
| 276 |
+
|
| 277 |
+
| Decision | Default | Notes |
|
| 278 |
+
|---|---|---|
|
| 279 |
+
| Primary pipeline | **A (XGBoost)** | Sub-ms inference, uses MediaPipe landmarks already extracted client-side |
|
| 280 |
+
| Confidence threshold for fallback | **0.70** | Tune after testing - if XGBoost < 70%, call Pipeline B |
|
| 281 |
+
| Enable Pipeline C (CNN) | **Optional / off by default** | Adds ~150ms latency and requires image upload, not just landmarks |
|
| 282 |
+
| MediaPipe model variant | **lite** | Use `hand_landmarker_lite.task` for mobile performance |
|
| 283 |
+
| WebSocket frame rate | **15fps** | Sufficient for sign recognition, avoids server overload |
|
| 284 |
+
| Gujarati label map | **CONFIRM WITH DATASET** | Classes 0–33 must match training data exactly |
|
backend/.env.example
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SanketSetu Backend — environment variables
|
| 2 |
+
# Copy this file to .env and edit as needed.
|
| 3 |
+
# All values below are the defaults; remove a line to keep the default.
|
| 4 |
+
|
| 5 |
+
# ── Model paths (default: resolved from repo root) ─────────────────────────
|
| 6 |
+
# WEIGHTS_DIR=/absolute/path/to/model/dir
|
| 7 |
+
|
| 8 |
+
# ── Inference ───────────────────────────────────────────────────────────────
|
| 9 |
+
# Pipeline A confidence below this → also run Pipeline B
|
| 10 |
+
CONFIDENCE_THRESHOLD=0.70
|
| 11 |
+
|
| 12 |
+
# Pipeline A+B ensemble confidence below this → also run Pipeline C (if image)
|
| 13 |
+
SECONDARY_THRESHOLD=0.60
|
| 14 |
+
|
| 15 |
+
# Which pipeline to run: A | B | C | ensemble
|
| 16 |
+
PIPELINE_MODE=ensemble
|
| 17 |
+
|
| 18 |
+
# ── Server ──────────────────────────────────────────────────────────────────
|
| 19 |
+
MAX_WS_CONNECTIONS=100
|
| 20 |
+
|
| 21 |
+
# Comma-separated list of allowed CORS origins
|
| 22 |
+
CORS_ORIGINS=http://localhost:5173,http://localhost:3000
|
| 23 |
+
|
| 24 |
+
# ── TensorFlow / Keras ──────────────────────────────────────────────────────
|
| 25 |
+
KERAS_BACKEND=tensorflow
|
| 26 |
+
TF_CPP_MIN_LOG_LEVEL=3
|
| 27 |
+
CUDA_VISIBLE_DEVICES= # empty = CPU-only, skip GPU scan (faster startup)
|
| 28 |
+
TF_ENABLE_ONEDNN_OPTS=0
|
| 29 |
+
OMP_NUM_THREADS=4
|
| 30 |
+
|
| 31 |
+
# ── Logging ─────────────────────────────────────────────────────────────────
|
| 32 |
+
LOG_LEVEL=INFO
|
backend/app/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# backend/app/__init__.py
|
backend/app/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (162 Bytes). View file
|
|
|
backend/app/__pycache__/config.cpython-312.pyc
ADDED
|
Binary file (2.97 kB). View file
|
|
|
backend/app/__pycache__/main.cpython-312.pyc
ADDED
|
Binary file (12.1 kB). View file
|
|
|
backend/app/__pycache__/schemas.cpython-312.pyc
ADDED
|
Binary file (3.93 kB). View file
|
|
|
backend/app/config.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Application-wide settings for SanketSetu backend.
|
| 3 |
+
Override any value by setting the corresponding environment variable.
|
| 4 |
+
"""
|
| 5 |
+
from __future__ import annotations
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
# ---------------------------------------------------------------------------
|
| 11 |
+
# TensorFlow / Keras startup optimisations
|
| 12 |
+
# Set these BEFORE any import that might pull in tensorflow.
|
| 13 |
+
# ---------------------------------------------------------------------------
|
| 14 |
+
os.environ.setdefault("KERAS_BACKEND", "tensorflow")
|
| 15 |
+
os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL", "3") # silence C++ TF logs
|
| 16 |
+
os.environ.setdefault("CUDA_VISIBLE_DEVICES", "") # CPU-only: skip GPU scan
|
| 17 |
+
os.environ.setdefault("TF_ENABLE_ONEDNN_OPTS", "0") # disable oneDNN init check
|
| 18 |
+
os.environ.setdefault("OMP_NUM_THREADS", "4") # cap CPU thread pool
|
| 19 |
+
|
| 20 |
+
# ---------------------------------------------------------------------------
|
| 21 |
+
# Paths
|
| 22 |
+
# ---------------------------------------------------------------------------
|
| 23 |
+
BASE_DIR = Path(__file__).resolve().parent.parent.parent # repo root
|
| 24 |
+
WEIGHTS_DIR = os.getenv("WEIGHTS_DIR", str(BASE_DIR))
|
| 25 |
+
|
| 26 |
+
# Individual model paths (relative to repo root)
|
| 27 |
+
PIPELINE_A_MODEL = os.path.join(WEIGHTS_DIR, "Mediapipe_XGBoost", "model.pkl")
|
| 28 |
+
PIPELINE_B_AE = os.path.join(WEIGHTS_DIR, "CNN_Autoencoder_LightGBM", "autoencoder_model.pkl")
|
| 29 |
+
PIPELINE_B_LGBM = os.path.join(WEIGHTS_DIR, "CNN_Autoencoder_LightGBM", "lgbm_model.pkl")
|
| 30 |
+
PIPELINE_C_CNN = os.path.join(WEIGHTS_DIR, "CNN_PreTrained", "cnn_model.pkl")
|
| 31 |
+
PIPELINE_C_SVM = os.path.join(WEIGHTS_DIR, "CNN_PreTrained", "svm_model.pkl")
|
| 32 |
+
|
| 33 |
+
# ---------------------------------------------------------------------------
|
| 34 |
+
# Inference thresholds
|
| 35 |
+
# ---------------------------------------------------------------------------
|
| 36 |
+
# If Pipeline A confidence falls below this, Pipeline B is also called.
|
| 37 |
+
CONFIDENCE_THRESHOLD: float = float(os.getenv("CONFIDENCE_THRESHOLD", "0.70"))
|
| 38 |
+
|
| 39 |
+
# If ensemble after B still below this, Pipeline C is attempted (if image provided).
|
| 40 |
+
SECONDARY_THRESHOLD: float = float(os.getenv("SECONDARY_THRESHOLD", "0.60"))
|
| 41 |
+
|
| 42 |
+
# ---------------------------------------------------------------------------
|
| 43 |
+
# Pipeline mode
|
| 44 |
+
# ---------------------------------------------------------------------------
|
| 45 |
+
# "A" → only XGBoost (fastest)
|
| 46 |
+
# "B" → only Autoencoder + LGBM
|
| 47 |
+
# "C" → only CNN + SVM (image required)
|
| 48 |
+
# "ensemble" → A first, fallback to B, then C
|
| 49 |
+
PIPELINE_MODE: str = os.getenv("PIPELINE_MODE", "ensemble")
|
| 50 |
+
|
| 51 |
+
# ---------------------------------------------------------------------------
|
| 52 |
+
# WebSocket / server
|
| 53 |
+
# ---------------------------------------------------------------------------
|
| 54 |
+
MAX_WS_CONNECTIONS: int = int(os.getenv("MAX_WS_CONNECTIONS", "100"))
|
| 55 |
+
WS_SEND_RATE_LIMIT: int = int(os.getenv("WS_SEND_RATE_LIMIT", "15")) # max frames/sec per client
|
| 56 |
+
|
| 57 |
+
# Allowed CORS origins (comma-separated list in env var)
|
| 58 |
+
_cors_env = os.getenv("CORS_ORIGINS", "http://localhost:5173,http://localhost:3000")
|
| 59 |
+
CORS_ORIGINS: list[str] = [o.strip() for o in _cors_env.split(",") if o.strip()]
|
| 60 |
+
|
| 61 |
+
# ---------------------------------------------------------------------------
|
| 62 |
+
# Logging
|
| 63 |
+
# ---------------------------------------------------------------------------
|
| 64 |
+
LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO")
|
backend/app/inference/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# backend/app/inference/__init__.py
|
backend/app/inference/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (172 Bytes). View file
|
|
|
backend/app/inference/__pycache__/ensemble.cpython-312.pyc
ADDED
|
Binary file (5.66 kB). View file
|
|
|
backend/app/inference/__pycache__/pipeline_a.cpython-312.pyc
ADDED
|
Binary file (2.16 kB). View file
|
|
|
backend/app/inference/__pycache__/pipeline_b.cpython-312.pyc
ADDED
|
Binary file (2.4 kB). View file
|
|
|
backend/app/inference/__pycache__/pipeline_c.cpython-312.pyc
ADDED
|
Binary file (3.52 kB). View file
|
|
|
backend/app/inference/ensemble.py
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Ensemble logic — orchestrates Pipelines A → B → C with confidence-based fallback.
|
| 3 |
+
|
| 4 |
+
Strategy
|
| 5 |
+
--------
|
| 6 |
+
1. Always run Pipeline A (XGBoost, sub-ms).
|
| 7 |
+
2. If confidence < CONFIDENCE_THRESHOLD, also run Pipeline B (Autoencoder+LGBM).
|
| 8 |
+
3. Average the probability vectors from the pipelines that were run.
|
| 9 |
+
4. If ensemble confidence still < SECONDARY_THRESHOLD AND image data is supplied,
|
| 10 |
+
also run Pipeline C (CNN+SVM) and include it in the average.
|
| 11 |
+
5. Return the class with the highest averaged probability.
|
| 12 |
+
|
| 13 |
+
The caller can also force a specific pipeline via the PIPELINE_MODE config.
|
| 14 |
+
"""
|
| 15 |
+
from __future__ import annotations
|
| 16 |
+
|
| 17 |
+
import logging
|
| 18 |
+
import time
|
| 19 |
+
from typing import List, Optional, Any
|
| 20 |
+
|
| 21 |
+
import numpy as np
|
| 22 |
+
|
| 23 |
+
from app import config
|
| 24 |
+
from app.models.label_map import get_sign
|
| 25 |
+
from app.inference.pipeline_a import PredictionResult
|
| 26 |
+
import app.inference.pipeline_a as _pa
|
| 27 |
+
import app.inference.pipeline_b as _pb
|
| 28 |
+
import app.inference.pipeline_c as _pc
|
| 29 |
+
|
| 30 |
+
logger = logging.getLogger(__name__)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def run(
|
| 34 |
+
landmarks: List[float],
|
| 35 |
+
*,
|
| 36 |
+
image_input: Optional[str] = None,
|
| 37 |
+
xgb_model: Any,
|
| 38 |
+
encoder_model: Any,
|
| 39 |
+
lgbm_model: Any,
|
| 40 |
+
cnn_model: Any,
|
| 41 |
+
svm_model: Any,
|
| 42 |
+
pipeline_mode: str = "ensemble",
|
| 43 |
+
confidence_threshold: float = 0.70,
|
| 44 |
+
secondary_threshold: float = 0.60,
|
| 45 |
+
) -> PredictionResult:
|
| 46 |
+
"""
|
| 47 |
+
Run one or more inference pipelines and return a consolidated PredictionResult.
|
| 48 |
+
|
| 49 |
+
Parameters
|
| 50 |
+
----------
|
| 51 |
+
landmarks : flat 63-element MediaPipe landmark vector
|
| 52 |
+
image_input : optional base-64 JPEG for Pipeline C
|
| 53 |
+
xgb_model : Pipeline A model
|
| 54 |
+
encoder_model : Pipeline B encoder (Keras sub-model)
|
| 55 |
+
lgbm_model : Pipeline B classifier
|
| 56 |
+
cnn_model : Pipeline C feature extractor
|
| 57 |
+
svm_model : Pipeline C classifier
|
| 58 |
+
pipeline_mode : "A" | "B" | "C" | "ensemble"
|
| 59 |
+
confidence_threshold : fallback to B when A confidence < this value
|
| 60 |
+
secondary_threshold : fallback to C when ensemble(A+B) confidence < this value
|
| 61 |
+
"""
|
| 62 |
+
t0 = time.perf_counter()
|
| 63 |
+
|
| 64 |
+
# -----------------------------------------------------------
|
| 65 |
+
# Forced single-pipeline modes
|
| 66 |
+
# -----------------------------------------------------------
|
| 67 |
+
if pipeline_mode == "A":
|
| 68 |
+
if xgb_model is None:
|
| 69 |
+
raise RuntimeError("Pipeline A model not loaded.")
|
| 70 |
+
return _pa.predict(landmarks, xgb_model)
|
| 71 |
+
|
| 72 |
+
if pipeline_mode == "B":
|
| 73 |
+
if encoder_model is None or lgbm_model is None:
|
| 74 |
+
raise RuntimeError("Pipeline B models not loaded.")
|
| 75 |
+
return _pb.predict(landmarks, encoder_model, lgbm_model)
|
| 76 |
+
|
| 77 |
+
if pipeline_mode == "C":
|
| 78 |
+
if cnn_model is None or svm_model is None:
|
| 79 |
+
raise RuntimeError("Pipeline C models not loaded.")
|
| 80 |
+
if image_input is None:
|
| 81 |
+
raise ValueError("Pipeline C requires image_input.")
|
| 82 |
+
return _pc.predict(image_input, cnn_model, svm_model)
|
| 83 |
+
|
| 84 |
+
# -----------------------------------------------------------
|
| 85 |
+
# Ensemble mode (default)
|
| 86 |
+
# -----------------------------------------------------------
|
| 87 |
+
results: list[PredictionResult] = []
|
| 88 |
+
proba_stack: list[list[float]] = []
|
| 89 |
+
|
| 90 |
+
# Step 1 — Pipeline A (always)
|
| 91 |
+
if xgb_model is not None:
|
| 92 |
+
res_a = _pa.predict(landmarks, xgb_model)
|
| 93 |
+
results.append(res_a)
|
| 94 |
+
proba_stack.append(res_a.probabilities)
|
| 95 |
+
else:
|
| 96 |
+
logger.warning("Pipeline A not available in ensemble mode.")
|
| 97 |
+
res_a = None
|
| 98 |
+
|
| 99 |
+
# Step 2 — Pipeline B if A confidence is low
|
| 100 |
+
current_conf = float(np.max(np.mean(proba_stack, axis=0))) if proba_stack else 0.0
|
| 101 |
+
if current_conf < confidence_threshold and encoder_model is not None and lgbm_model is not None:
|
| 102 |
+
res_b = _pb.predict(landmarks, encoder_model, lgbm_model)
|
| 103 |
+
results.append(res_b)
|
| 104 |
+
proba_stack.append(res_b.probabilities)
|
| 105 |
+
|
| 106 |
+
# Step 3 — Pipeline C if still low and image provided
|
| 107 |
+
current_conf = float(np.max(np.mean(proba_stack, axis=0))) if proba_stack else 0.0
|
| 108 |
+
if (
|
| 109 |
+
current_conf < secondary_threshold
|
| 110 |
+
and image_input is not None
|
| 111 |
+
and cnn_model is not None
|
| 112 |
+
and svm_model is not None
|
| 113 |
+
):
|
| 114 |
+
res_c = _pc.predict(image_input, cnn_model, svm_model)
|
| 115 |
+
results.append(res_c)
|
| 116 |
+
proba_stack.append(res_c.probabilities)
|
| 117 |
+
|
| 118 |
+
# -----------------------------------------------------------
|
| 119 |
+
# Aggregate
|
| 120 |
+
# -----------------------------------------------------------
|
| 121 |
+
if not proba_stack:
|
| 122 |
+
raise RuntimeError("No inference pipeline could be executed.")
|
| 123 |
+
|
| 124 |
+
avg_proba = np.mean(proba_stack, axis=0) # shape (34,)
|
| 125 |
+
idx = int(np.argmax(avg_proba))
|
| 126 |
+
conf = float(avg_proba[idx])
|
| 127 |
+
|
| 128 |
+
pipeline_labels = "+".join(r.pipeline for r in results)
|
| 129 |
+
total_latency = (time.perf_counter() - t0) * 1000
|
| 130 |
+
|
| 131 |
+
return PredictionResult(
|
| 132 |
+
sign=get_sign(idx),
|
| 133 |
+
confidence=conf,
|
| 134 |
+
label_index=idx,
|
| 135 |
+
probabilities=avg_proba.tolist(),
|
| 136 |
+
pipeline=pipeline_labels if len(results) > 1 else results[0].pipeline,
|
| 137 |
+
latency_ms=total_latency,
|
| 138 |
+
)
|
backend/app/inference/pipeline_a.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Pipeline A — XGBoost classifier on raw MediaPipe landmarks.
|
| 3 |
+
|
| 4 |
+
Input : 63 floats [x0,y0,z0 … x20,y20,z20] (already [0,1] normalised by MediaPipe)
|
| 5 |
+
Output : PredictionResult
|
| 6 |
+
"""
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
|
| 9 |
+
import time
|
| 10 |
+
from dataclasses import dataclass
|
| 11 |
+
from typing import List
|
| 12 |
+
|
| 13 |
+
import numpy as np
|
| 14 |
+
|
| 15 |
+
from app.models.label_map import get_sign
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
@dataclass
|
| 19 |
+
class PredictionResult:
|
| 20 |
+
sign: str
|
| 21 |
+
confidence: float
|
| 22 |
+
label_index: int
|
| 23 |
+
probabilities: List[float]
|
| 24 |
+
pipeline: str
|
| 25 |
+
latency_ms: float
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def predict(landmarks: List[float], xgb_model) -> PredictionResult:
|
| 29 |
+
"""
|
| 30 |
+
Run XGBoost inference on a flat 63-element landmark vector.
|
| 31 |
+
|
| 32 |
+
Parameters
|
| 33 |
+
----------
|
| 34 |
+
landmarks : list of 63 floats
|
| 35 |
+
xgb_model : loaded XGBClassifier instance
|
| 36 |
+
|
| 37 |
+
Returns
|
| 38 |
+
-------
|
| 39 |
+
PredictionResult
|
| 40 |
+
"""
|
| 41 |
+
t0 = time.perf_counter()
|
| 42 |
+
|
| 43 |
+
X = np.array(landmarks, dtype=np.float32).reshape(1, -1) # shape (1, 63)
|
| 44 |
+
proba = xgb_model.predict_proba(X)[0] # shape (34,)
|
| 45 |
+
idx = int(np.argmax(proba))
|
| 46 |
+
conf = float(proba[idx])
|
| 47 |
+
|
| 48 |
+
latency = (time.perf_counter() - t0) * 1000
|
| 49 |
+
|
| 50 |
+
return PredictionResult(
|
| 51 |
+
sign=get_sign(idx),
|
| 52 |
+
confidence=conf,
|
| 53 |
+
label_index=idx,
|
| 54 |
+
probabilities=proba.tolist(),
|
| 55 |
+
pipeline="A",
|
| 56 |
+
latency_ms=latency,
|
| 57 |
+
)
|
backend/app/inference/pipeline_b.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Pipeline B — Autoencoder encoder + LightGBM classifier.
|
| 3 |
+
|
| 4 |
+
Input : 63 floats [x0,y0,z0 … x20,y20,z20]
|
| 5 |
+
Process: Keras encoder compresses to 16-D bottleneck → LGBMClassifier
|
| 6 |
+
Output : PredictionResult
|
| 7 |
+
"""
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import time
|
| 11 |
+
import warnings
|
| 12 |
+
from dataclasses import dataclass
|
| 13 |
+
from typing import List, Any
|
| 14 |
+
|
| 15 |
+
import numpy as np
|
| 16 |
+
|
| 17 |
+
from app.models.label_map import get_sign
|
| 18 |
+
from app.inference.pipeline_a import PredictionResult
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def predict(landmarks: List[float], encoder_model: Any, lgbm_model: Any) -> PredictionResult:
|
| 22 |
+
"""
|
| 23 |
+
Run the autoencoder-encoder → LightGBM inference chain.
|
| 24 |
+
|
| 25 |
+
Parameters
|
| 26 |
+
----------
|
| 27 |
+
landmarks : list of 63 floats
|
| 28 |
+
encoder_model : Keras Model (input 63→output 16, bottleneck sub-model)
|
| 29 |
+
lgbm_model : loaded LGBMClassifier instance
|
| 30 |
+
|
| 31 |
+
Returns
|
| 32 |
+
-------
|
| 33 |
+
PredictionResult
|
| 34 |
+
"""
|
| 35 |
+
t0 = time.perf_counter()
|
| 36 |
+
|
| 37 |
+
X = np.array(landmarks, dtype=np.float32).reshape(1, -1) # (1, 63)
|
| 38 |
+
|
| 39 |
+
# Encode to 16-D bottleneck (suppress verbose Keras progress bar)
|
| 40 |
+
features = encoder_model(X, training=False).numpy() # (1, 16)
|
| 41 |
+
|
| 42 |
+
# LightGBM classify — suppress sklearn feature-name warning (model was
|
| 43 |
+
# fitted with a named DataFrame; numpy array input is perfectly valid)
|
| 44 |
+
with warnings.catch_warnings():
|
| 45 |
+
warnings.simplefilter("ignore", UserWarning)
|
| 46 |
+
proba = lgbm_model.predict_proba(features)[0] # (34,)
|
| 47 |
+
idx = int(np.argmax(proba))
|
| 48 |
+
conf = float(proba[idx])
|
| 49 |
+
|
| 50 |
+
latency = (time.perf_counter() - t0) * 1000
|
| 51 |
+
|
| 52 |
+
return PredictionResult(
|
| 53 |
+
sign=get_sign(idx),
|
| 54 |
+
confidence=conf,
|
| 55 |
+
label_index=idx,
|
| 56 |
+
probabilities=proba.tolist(),
|
| 57 |
+
pipeline="B",
|
| 58 |
+
latency_ms=latency,
|
| 59 |
+
)
|
backend/app/inference/pipeline_c.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Pipeline C — Pre-trained CNN (ResNet50) feature extractor + SVM classifier.
|
| 3 |
+
|
| 4 |
+
Input : base-64 encoded JPEG string OR raw bytes of a 128×128 RGB hand-crop.
|
| 5 |
+
Process: Decode → normalise → CNN (256-D features) → SVC.predict_proba
|
| 6 |
+
Output : PredictionResult
|
| 7 |
+
|
| 8 |
+
Note: This pipeline is significantly slower (~100–300 ms on CPU) and is only
|
| 9 |
+
invoked as a fallback when landmark-based pipelines have low confidence.
|
| 10 |
+
"""
|
| 11 |
+
from __future__ import annotations
|
| 12 |
+
|
| 13 |
+
import base64
|
| 14 |
+
import io
|
| 15 |
+
import time
|
| 16 |
+
from dataclasses import dataclass
|
| 17 |
+
from typing import Any, List, Union
|
| 18 |
+
|
| 19 |
+
import numpy as np
|
| 20 |
+
from PIL import Image
|
| 21 |
+
|
| 22 |
+
from app.models.label_map import get_sign
|
| 23 |
+
from app.inference.pipeline_a import PredictionResult
|
| 24 |
+
|
| 25 |
+
# Target input size expected by the CNN (ResNet50 Functional model)
|
| 26 |
+
CNN_IMG_SIZE: int = 128
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def _decode_image(image_input: Union[str, bytes]) -> np.ndarray:
|
| 30 |
+
"""
|
| 31 |
+
Accept either:
|
| 32 |
+
- A base-64 encoded JPEG string (from WebSocket JSON payload)
|
| 33 |
+
- Raw bytes (from HTTP multipart)
|
| 34 |
+
Returns a (128, 128, 3) float32 array normalised to [0, 1].
|
| 35 |
+
"""
|
| 36 |
+
if isinstance(image_input, str):
|
| 37 |
+
raw = base64.b64decode(image_input)
|
| 38 |
+
else:
|
| 39 |
+
raw = image_input
|
| 40 |
+
|
| 41 |
+
img = Image.open(io.BytesIO(raw)).convert("RGB")
|
| 42 |
+
img = img.resize((CNN_IMG_SIZE, CNN_IMG_SIZE), Image.LANCZOS)
|
| 43 |
+
arr = np.array(img, dtype=np.float32) / 255.0
|
| 44 |
+
return arr # (128, 128, 3)
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def predict(
|
| 48 |
+
image_input: Union[str, bytes],
|
| 49 |
+
cnn_model: Any,
|
| 50 |
+
svm_model: Any,
|
| 51 |
+
) -> PredictionResult:
|
| 52 |
+
"""
|
| 53 |
+
Run the CNN + SVM inference pipeline.
|
| 54 |
+
|
| 55 |
+
Parameters
|
| 56 |
+
----------
|
| 57 |
+
image_input : base-64 JPEG string or raw bytes of the hand crop (any size; will be resized)
|
| 58 |
+
cnn_model : Keras Functional model (ResNet50-based, output 256-D feature vector)
|
| 59 |
+
svm_model : loaded SVC(C=10, probability=True) instance
|
| 60 |
+
|
| 61 |
+
Returns
|
| 62 |
+
-------
|
| 63 |
+
PredictionResult
|
| 64 |
+
"""
|
| 65 |
+
t0 = time.perf_counter()
|
| 66 |
+
|
| 67 |
+
img = _decode_image(image_input) # (128, 128, 3)
|
| 68 |
+
batch = img[np.newaxis] # (1, 128, 128, 3)
|
| 69 |
+
|
| 70 |
+
# CNN forward pass — directly call model (avoids Keras verbose logging)
|
| 71 |
+
features = cnn_model(batch, training=False).numpy() # (1, 256)
|
| 72 |
+
|
| 73 |
+
proba = svm_model.predict_proba(features)[0] # (34,)
|
| 74 |
+
idx = int(np.argmax(proba))
|
| 75 |
+
conf = float(proba[idx])
|
| 76 |
+
|
| 77 |
+
latency = (time.perf_counter() - t0) * 1000
|
| 78 |
+
|
| 79 |
+
return PredictionResult(
|
| 80 |
+
sign=get_sign(idx),
|
| 81 |
+
confidence=conf,
|
| 82 |
+
label_index=idx,
|
| 83 |
+
probabilities=proba.tolist(),
|
| 84 |
+
pipeline="C",
|
| 85 |
+
latency_ms=latency,
|
| 86 |
+
)
|
backend/app/main.py
ADDED
|
@@ -0,0 +1,280 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
SanketSetu FastAPI backend — entry point.
|
| 3 |
+
|
| 4 |
+
Endpoints
|
| 5 |
+
---------
|
| 6 |
+
GET /health → HealthResponse
|
| 7 |
+
WS /ws/landmarks → real-time sign recognition (landmark stream)
|
| 8 |
+
WS /ws/image → image-based sign recognition (Pipeline C)
|
| 9 |
+
POST /api/predict → REST fallback for landmark inference
|
| 10 |
+
POST /api/predict/image → REST fallback for image inference
|
| 11 |
+
"""
|
| 12 |
+
from __future__ import annotations
|
| 13 |
+
|
| 14 |
+
import json
|
| 15 |
+
import logging
|
| 16 |
+
import os
|
| 17 |
+
import time
|
| 18 |
+
from contextlib import asynccontextmanager
|
| 19 |
+
from pathlib import Path
|
| 20 |
+
from typing import Any
|
| 21 |
+
|
| 22 |
+
# Load .env if present (before config is imported so env vars are available)
|
| 23 |
+
try:
|
| 24 |
+
from dotenv import load_dotenv
|
| 25 |
+
_env_file = Path(__file__).resolve().parent.parent / ".env"
|
| 26 |
+
if _env_file.exists():
|
| 27 |
+
load_dotenv(_env_file)
|
| 28 |
+
except ImportError:
|
| 29 |
+
pass # python-dotenv not installed; rely on shell env
|
| 30 |
+
|
| 31 |
+
import numpy as np
|
| 32 |
+
from fastapi import FastAPI, WebSocket, WebSocketDisconnect, HTTPException, Request
|
| 33 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 34 |
+
from fastapi.responses import JSONResponse
|
| 35 |
+
|
| 36 |
+
from app import config
|
| 37 |
+
from app.models.loader import load_models, get_model_store
|
| 38 |
+
from app.schemas import (
|
| 39 |
+
LandmarkMessage,
|
| 40 |
+
ImageMessage,
|
| 41 |
+
EnsembleMessage,
|
| 42 |
+
PredictionResponse,
|
| 43 |
+
HealthResponse,
|
| 44 |
+
ErrorResponse,
|
| 45 |
+
)
|
| 46 |
+
import app.inference.ensemble as ensemble
|
| 47 |
+
|
| 48 |
+
# ---------------------------------------------------------------------------
|
| 49 |
+
# Logging
|
| 50 |
+
# ---------------------------------------------------------------------------
|
| 51 |
+
logging.basicConfig(
|
| 52 |
+
level=getattr(logging, config.LOG_LEVEL, logging.INFO),
|
| 53 |
+
format="%(asctime)s %(levelname)-8s %(name)s — %(message)s",
|
| 54 |
+
)
|
| 55 |
+
logger = logging.getLogger("sanketsetu")
|
| 56 |
+
|
| 57 |
+
# Silence noisy TF / Keras output
|
| 58 |
+
os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL", "3")
|
| 59 |
+
os.environ.setdefault("KERAS_BACKEND", "tensorflow")
|
| 60 |
+
logging.getLogger("tensorflow").setLevel(logging.ERROR)
|
| 61 |
+
logging.getLogger("keras").setLevel(logging.ERROR)
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
# ---------------------------------------------------------------------------
|
| 65 |
+
# Lifespan — load models on startup
|
| 66 |
+
# ---------------------------------------------------------------------------
|
| 67 |
+
@asynccontextmanager
|
| 68 |
+
async def lifespan(app: FastAPI):
|
| 69 |
+
logger.info("Starting SanketSetu backend …")
|
| 70 |
+
load_models()
|
| 71 |
+
logger.info("Models ready. Server accepting connections.")
|
| 72 |
+
yield
|
| 73 |
+
logger.info("Shutting down.")
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
# ---------------------------------------------------------------------------
|
| 77 |
+
# App
|
| 78 |
+
# ---------------------------------------------------------------------------
|
| 79 |
+
app = FastAPI(
|
| 80 |
+
title="SanketSetu API",
|
| 81 |
+
description="Real-time Gujarati Sign Language recognition backend",
|
| 82 |
+
version="1.0.0",
|
| 83 |
+
lifespan=lifespan,
|
| 84 |
+
)
|
| 85 |
+
|
| 86 |
+
# CORS
|
| 87 |
+
app.add_middleware(
|
| 88 |
+
CORSMiddleware,
|
| 89 |
+
allow_origins=config.CORS_ORIGINS,
|
| 90 |
+
allow_credentials=True,
|
| 91 |
+
allow_methods=["*"],
|
| 92 |
+
allow_headers=["*"],
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
# ---------------------------------------------------------------------------
|
| 97 |
+
# Global exception handler
|
| 98 |
+
# ---------------------------------------------------------------------------
|
| 99 |
+
@app.exception_handler(Exception)
|
| 100 |
+
async def global_exception_handler(request: Request, exc: Exception):
|
| 101 |
+
logger.exception("Unhandled error: %s", exc)
|
| 102 |
+
return JSONResponse(
|
| 103 |
+
status_code=500,
|
| 104 |
+
content=ErrorResponse(error="Internal server error", detail=str(exc)).model_dump(),
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
# ---------------------------------------------------------------------------
|
| 109 |
+
# Helpers
|
| 110 |
+
# ---------------------------------------------------------------------------
|
| 111 |
+
|
| 112 |
+
def _run_ensemble(
|
| 113 |
+
landmarks: list[float],
|
| 114 |
+
image_b64: str | None = None,
|
| 115 |
+
) -> PredictionResponse:
|
| 116 |
+
store = get_model_store()
|
| 117 |
+
result = ensemble.run(
|
| 118 |
+
landmarks,
|
| 119 |
+
image_input=image_b64,
|
| 120 |
+
xgb_model=store.xgb_model,
|
| 121 |
+
encoder_model=store.encoder_model,
|
| 122 |
+
lgbm_model=store.lgbm_model,
|
| 123 |
+
cnn_model=store.cnn_model,
|
| 124 |
+
svm_model=store.svm_model,
|
| 125 |
+
pipeline_mode=config.PIPELINE_MODE,
|
| 126 |
+
confidence_threshold=config.CONFIDENCE_THRESHOLD,
|
| 127 |
+
secondary_threshold=config.SECONDARY_THRESHOLD,
|
| 128 |
+
)
|
| 129 |
+
return PredictionResponse(
|
| 130 |
+
sign=result.sign,
|
| 131 |
+
confidence=result.confidence,
|
| 132 |
+
pipeline=result.pipeline,
|
| 133 |
+
label_index=result.label_index,
|
| 134 |
+
probabilities=result.probabilities,
|
| 135 |
+
latency_ms=result.latency_ms,
|
| 136 |
+
)
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
def _available_pipelines() -> list[str]:
|
| 140 |
+
try:
|
| 141 |
+
store = get_model_store()
|
| 142 |
+
except RuntimeError:
|
| 143 |
+
return []
|
| 144 |
+
pipelines = []
|
| 145 |
+
if store.xgb_model is not None:
|
| 146 |
+
pipelines.append("A")
|
| 147 |
+
if store.encoder_model is not None and store.lgbm_model is not None:
|
| 148 |
+
pipelines.append("B")
|
| 149 |
+
if store.cnn_model is not None and store.svm_model is not None:
|
| 150 |
+
pipelines.append("C")
|
| 151 |
+
return pipelines
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
# ---------------------------------------------------------------------------
|
| 155 |
+
# REST endpoints
|
| 156 |
+
# ---------------------------------------------------------------------------
|
| 157 |
+
|
| 158 |
+
@app.get("/health", response_model=HealthResponse)
|
| 159 |
+
async def health():
|
| 160 |
+
try:
|
| 161 |
+
store = get_model_store()
|
| 162 |
+
loaded = store.loaded
|
| 163 |
+
except RuntimeError:
|
| 164 |
+
loaded = False
|
| 165 |
+
return HealthResponse(
|
| 166 |
+
status="ok" if loaded else "loading",
|
| 167 |
+
models_loaded=loaded,
|
| 168 |
+
pipelines_available=_available_pipelines(),
|
| 169 |
+
)
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
@app.post("/api/predict", response_model=PredictionResponse)
|
| 173 |
+
async def predict_landmarks(body: LandmarkMessage):
|
| 174 |
+
"""REST fallback: send 63 landmark floats, receive prediction."""
|
| 175 |
+
return _run_ensemble(body.landmarks)
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
@app.post("/api/predict/image", response_model=PredictionResponse)
|
| 179 |
+
async def predict_image(body: ImageMessage):
|
| 180 |
+
"""REST fallback: send a base-64 hand crop, receive prediction via Pipeline C."""
|
| 181 |
+
store = get_model_store()
|
| 182 |
+
if store.cnn_model is None or store.svm_model is None:
|
| 183 |
+
raise HTTPException(status_code=503, detail="Pipeline C (CNN+SVM) is not available.")
|
| 184 |
+
import app.inference.pipeline_c as _pc
|
| 185 |
+
result = _pc.predict(body.image_b64, store.cnn_model, store.svm_model)
|
| 186 |
+
return PredictionResponse(
|
| 187 |
+
sign=result.sign,
|
| 188 |
+
confidence=result.confidence,
|
| 189 |
+
pipeline=result.pipeline,
|
| 190 |
+
label_index=result.label_index,
|
| 191 |
+
probabilities=result.probabilities,
|
| 192 |
+
latency_ms=result.latency_ms,
|
| 193 |
+
)
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
# ---------------------------------------------------------------------------
|
| 197 |
+
# WebSocket — landmark stream /ws/landmarks
|
| 198 |
+
# ---------------------------------------------------------------------------
|
| 199 |
+
|
| 200 |
+
@app.websocket("/ws/landmarks")
|
| 201 |
+
async def ws_landmarks(ws: WebSocket):
|
| 202 |
+
"""
|
| 203 |
+
Primary real-time endpoint.
|
| 204 |
+
Client sends: {"landmarks": [...63 floats...], "session_id": "..."}
|
| 205 |
+
Server replies: PredictionResponse JSON
|
| 206 |
+
"""
|
| 207 |
+
await ws.accept()
|
| 208 |
+
session_id = "unknown"
|
| 209 |
+
try:
|
| 210 |
+
while True:
|
| 211 |
+
raw = await ws.receive_text()
|
| 212 |
+
try:
|
| 213 |
+
data = json.loads(raw)
|
| 214 |
+
msg = LandmarkMessage(**data)
|
| 215 |
+
session_id = msg.session_id
|
| 216 |
+
|
| 217 |
+
response = _run_ensemble(msg.landmarks)
|
| 218 |
+
await ws.send_text(response.model_dump_json())
|
| 219 |
+
|
| 220 |
+
except ValueError as ve:
|
| 221 |
+
await ws.send_text(
|
| 222 |
+
ErrorResponse(error="Validation error", detail=str(ve)).model_dump_json()
|
| 223 |
+
)
|
| 224 |
+
except Exception as e:
|
| 225 |
+
logger.error("[%s] Inference error: %s", session_id, e, exc_info=True)
|
| 226 |
+
await ws.send_text(
|
| 227 |
+
ErrorResponse(error="Inference failed", detail=str(e)).model_dump_json()
|
| 228 |
+
)
|
| 229 |
+
|
| 230 |
+
except WebSocketDisconnect:
|
| 231 |
+
logger.info("Client disconnected: %s", session_id)
|
| 232 |
+
|
| 233 |
+
|
| 234 |
+
# ---------------------------------------------------------------------------
|
| 235 |
+
# WebSocket — image stream /ws/image (Pipeline C)
|
| 236 |
+
# ---------------------------------------------------------------------------
|
| 237 |
+
|
| 238 |
+
@app.websocket("/ws/image")
|
| 239 |
+
async def ws_image(ws: WebSocket):
|
| 240 |
+
"""
|
| 241 |
+
Image-based endpoint for Pipeline C (CNN+SVM).
|
| 242 |
+
Client sends: {"image_b64": "<base64 JPEG>", "session_id": "..."}
|
| 243 |
+
"""
|
| 244 |
+
await ws.accept()
|
| 245 |
+
session_id = "unknown"
|
| 246 |
+
try:
|
| 247 |
+
while True:
|
| 248 |
+
raw = await ws.receive_text()
|
| 249 |
+
try:
|
| 250 |
+
data = json.loads(raw)
|
| 251 |
+
msg = ImageMessage(**data)
|
| 252 |
+
session_id = msg.session_id
|
| 253 |
+
|
| 254 |
+
store = get_model_store()
|
| 255 |
+
if store.cnn_model is None or store.svm_model is None:
|
| 256 |
+
await ws.send_text(
|
| 257 |
+
ErrorResponse(error="Pipeline C not available").model_dump_json()
|
| 258 |
+
)
|
| 259 |
+
continue
|
| 260 |
+
|
| 261 |
+
import app.inference.pipeline_c as _pc
|
| 262 |
+
result = _pc.predict(msg.image_b64, store.cnn_model, store.svm_model)
|
| 263 |
+
response = PredictionResponse(
|
| 264 |
+
sign=result.sign,
|
| 265 |
+
confidence=result.confidence,
|
| 266 |
+
pipeline=result.pipeline,
|
| 267 |
+
label_index=result.label_index,
|
| 268 |
+
probabilities=result.probabilities,
|
| 269 |
+
latency_ms=result.latency_ms,
|
| 270 |
+
)
|
| 271 |
+
await ws.send_text(response.model_dump_json())
|
| 272 |
+
|
| 273 |
+
except Exception as e:
|
| 274 |
+
logger.error("[%s] Image inference error: %s", session_id, e, exc_info=True)
|
| 275 |
+
await ws.send_text(
|
| 276 |
+
ErrorResponse(error="Inference failed", detail=str(e)).model_dump_json()
|
| 277 |
+
)
|
| 278 |
+
|
| 279 |
+
except WebSocketDisconnect:
|
| 280 |
+
logger.info("Image client disconnected: %s", session_id)
|
backend/app/models/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# backend/app/models/__init__.py
|
backend/app/models/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (169 Bytes). View file
|
|
|
backend/app/models/__pycache__/label_map.cpython-312.pyc
ADDED
|
Binary file (2.3 kB). View file
|
|
|
backend/app/models/__pycache__/loader.cpython-312.pyc
ADDED
|
Binary file (7.72 kB). View file
|
|
|
backend/app/models/label_map.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Label map: index 0–33 → Gujarati sign name.
|
| 3 |
+
|
| 4 |
+
The 34 classes cover the Gujarati consonant alphabet (ક–ળ / ક-ળ) as used in the
|
| 5 |
+
training dataset. Verify this order against your original data-collection script /
|
| 6 |
+
notebook — if your dataset folder names differ, update the list below.
|
| 7 |
+
|
| 8 |
+
Current assumption: classes are sorted by the Gujarati alphabet order (Unicode order
|
| 9 |
+
of the Unicode Gujarati block, U+0A80–U+0AFF).
|
| 10 |
+
"""
|
| 11 |
+
from __future__ import annotations
|
| 12 |
+
|
| 13 |
+
# ---- Primary label map (index → Gujarati character / word) -----------------
|
| 14 |
+
# 34 classes: consonants + a few vowel signs used as standalone signs
|
| 15 |
+
LABEL_MAP: dict[int, str] = {
|
| 16 |
+
0: "ક", # ka
|
| 17 |
+
1: "ખ", # kha
|
| 18 |
+
2: "ગ", # ga
|
| 19 |
+
3: "ઘ", # gha
|
| 20 |
+
4: "ચ", # cha
|
| 21 |
+
5: "છ", # chha
|
| 22 |
+
6: "જ", # ja
|
| 23 |
+
7: "ઝ", # jha
|
| 24 |
+
8: "ટ", # ṭa
|
| 25 |
+
9: "ઠ", # ṭha
|
| 26 |
+
10: "ડ", # ḍa
|
| 27 |
+
11: "ઢ", # ḍha
|
| 28 |
+
12: "ણ", # ṇa
|
| 29 |
+
13: "ત", # ta
|
| 30 |
+
14: "થ", # tha
|
| 31 |
+
15: "દ", # da
|
| 32 |
+
16: "ધ", # dha
|
| 33 |
+
17: "ન", # na
|
| 34 |
+
18: "પ", # pa
|
| 35 |
+
19: "ફ", # pha
|
| 36 |
+
20: "બ", # ba
|
| 37 |
+
21: "ભ", # bha
|
| 38 |
+
22: "મ", # ma
|
| 39 |
+
23: "ય", # ya
|
| 40 |
+
24: "ર", # ra
|
| 41 |
+
25: "લ", # la
|
| 42 |
+
26: "વ", # va
|
| 43 |
+
27: "શ", # sha
|
| 44 |
+
28: "ષ", # ṣha
|
| 45 |
+
29: "સ", # sa
|
| 46 |
+
30: "હ", # ha
|
| 47 |
+
31: "ળ", # ḷa
|
| 48 |
+
32: "ક્ષ", # ksha (conjunct)
|
| 49 |
+
33: "જ્ઞ", # gna (conjunct)
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
# Reverse map: sign name → index (useful for testing)
|
| 53 |
+
REVERSE_MAP: dict[str, int] = {v: k for k, v in LABEL_MAP.items()}
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def get_sign(label_index: int) -> str:
|
| 57 |
+
"""Return the Gujarati sign for the given class index."""
|
| 58 |
+
return LABEL_MAP.get(label_index, f"[{label_index}]")
|
backend/app/models/loader.py
ADDED
|
@@ -0,0 +1,188 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Singleton model store — loads all model artifacts once at startup and holds them
|
| 3 |
+
in memory for the lifetime of the process.
|
| 4 |
+
|
| 5 |
+
Usage inside FastAPI:
|
| 6 |
+
from app.models.loader import get_model_store
|
| 7 |
+
store = get_model_store() # dependency injection or direct call
|
| 8 |
+
"""
|
| 9 |
+
from __future__ import annotations
|
| 10 |
+
|
| 11 |
+
import logging
|
| 12 |
+
import os
|
| 13 |
+
import pickle
|
| 14 |
+
import time
|
| 15 |
+
from dataclasses import dataclass, field
|
| 16 |
+
from typing import Any
|
| 17 |
+
|
| 18 |
+
import numpy as np
|
| 19 |
+
|
| 20 |
+
logger = logging.getLogger(__name__)
|
| 21 |
+
|
| 22 |
+
# ---------------------------------------------------------------------------
|
| 23 |
+
# Data class that holds every loaded artifact
|
| 24 |
+
# ---------------------------------------------------------------------------
|
| 25 |
+
|
| 26 |
+
@dataclass
|
| 27 |
+
class ModelStore:
|
| 28 |
+
# Pipeline A
|
| 29 |
+
xgb_model: Any = field(default=None)
|
| 30 |
+
|
| 31 |
+
# Pipeline B
|
| 32 |
+
encoder_model: Any = field(default=None) # Keras sub-model (encoder half)
|
| 33 |
+
lgbm_model: Any = field(default=None)
|
| 34 |
+
|
| 35 |
+
# Pipeline C
|
| 36 |
+
cnn_model: Any = field(default=None) # Keras ResNet50 feature extractor
|
| 37 |
+
svm_model: Any = field(default=None)
|
| 38 |
+
|
| 39 |
+
loaded: bool = field(default=False)
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
# Module-level singleton
|
| 43 |
+
_store: ModelStore | None = None
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
# ---------------------------------------------------------------------------
|
| 47 |
+
# Internal helpers
|
| 48 |
+
# ---------------------------------------------------------------------------
|
| 49 |
+
|
| 50 |
+
def _load_pickle(path: str, label: str) -> Any:
|
| 51 |
+
t0 = time.perf_counter()
|
| 52 |
+
with open(path, "rb") as f:
|
| 53 |
+
obj = pickle.load(f)
|
| 54 |
+
elapsed = (time.perf_counter() - t0) * 1000
|
| 55 |
+
logger.info("Loaded %-35s (%.1f ms)", label, elapsed)
|
| 56 |
+
return obj
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def _build_encoder(autoencoder_pkl_path: str) -> Any:
|
| 60 |
+
"""
|
| 61 |
+
Load the full autoencoder from pickle and extract the encoder sub-model.
|
| 62 |
+
The autoencoder is a Keras Sequential:
|
| 63 |
+
InputLayer (63)
|
| 64 |
+
Dense 32 relu ← layer index 0
|
| 65 |
+
Dense 16 relu ← layer index 1 ← bottleneck output
|
| 66 |
+
Dense 32 relu
|
| 67 |
+
Dense 63 linear
|
| 68 |
+
We build a Keras Model that maps input → output of the bottleneck Dense.
|
| 69 |
+
"""
|
| 70 |
+
import os
|
| 71 |
+
os.environ.setdefault("KERAS_BACKEND", "tensorflow")
|
| 72 |
+
os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL", "3")
|
| 73 |
+
|
| 74 |
+
full_ae = _load_pickle(autoencoder_pkl_path, "autoencoder_model.pkl")
|
| 75 |
+
|
| 76 |
+
# Dynamically import keras after env vars are set
|
| 77 |
+
import keras
|
| 78 |
+
|
| 79 |
+
# The Sequential model's built layers: 0=Dense(32), 1=Dense(16), 2=Dense(32), 3=Dense(63)
|
| 80 |
+
# layer index 1 output is the 16-D bottleneck.
|
| 81 |
+
# We can't use full_ae.input directly on a Sequential that was pickled without
|
| 82 |
+
# a traced input tensor, so we wire the layers manually.
|
| 83 |
+
import numpy as _np
|
| 84 |
+
inp = keras.Input(shape=(63,), name="encoder_input")
|
| 85 |
+
x = full_ae.layers[0](inp) # Dense(32, relu)
|
| 86 |
+
x = full_ae.layers[1](x) # Dense(16, relu) — bottleneck
|
| 87 |
+
encoder = keras.Model(inputs=inp, outputs=x, name="encoder_only")
|
| 88 |
+
logger.info("Built encoder sub-model: input(%s) → output(%s)", encoder.input_shape, encoder.output_shape)
|
| 89 |
+
return encoder
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def _build_cnn_feature_extractor(cnn_pkl_path: str) -> Any:
|
| 93 |
+
"""
|
| 94 |
+
Load the full CNN (ResNet50 Functional model) from pickle and return a
|
| 95 |
+
sub-model that outputs the 256-D penultimate Dense layer.
|
| 96 |
+
|
| 97 |
+
Architecture (tail of the model):
|
| 98 |
+
… ResNet50 backbone …
|
| 99 |
+
GlobalAveragePooling2D
|
| 100 |
+
Dropout(0.5)
|
| 101 |
+
Dense(256, relu) ← feature vector we want
|
| 102 |
+
Dropout(0.5)
|
| 103 |
+
Dense(34, softmax) ← final classification head (skip this)
|
| 104 |
+
|
| 105 |
+
The SVC was trained on the 256-D features, so we must stop before the
|
| 106 |
+
final Dense(34) layer.
|
| 107 |
+
"""
|
| 108 |
+
import os
|
| 109 |
+
os.environ.setdefault("KERAS_BACKEND", "tensorflow")
|
| 110 |
+
os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL", "3")
|
| 111 |
+
|
| 112 |
+
import keras
|
| 113 |
+
|
| 114 |
+
full_cnn = _load_pickle(cnn_pkl_path, "cnn_model.pkl")
|
| 115 |
+
|
| 116 |
+
# Find the Dense(256) layer by scanning from the end
|
| 117 |
+
feature_layer = None
|
| 118 |
+
for layer in reversed(full_cnn.layers):
|
| 119 |
+
cfg = layer.get_config()
|
| 120 |
+
if layer.__class__.__name__ == 'Dense' and cfg.get('units') == 256:
|
| 121 |
+
feature_layer = layer
|
| 122 |
+
break
|
| 123 |
+
|
| 124 |
+
if feature_layer is None:
|
| 125 |
+
logger.warning(
|
| 126 |
+
"Could not find Dense(256) layer; using full CNN output as features."
|
| 127 |
+
)
|
| 128 |
+
return full_cnn
|
| 129 |
+
|
| 130 |
+
extractor = keras.Model(
|
| 131 |
+
inputs=full_cnn.input,
|
| 132 |
+
outputs=feature_layer.output,
|
| 133 |
+
name="cnn_feature_extractor",
|
| 134 |
+
)
|
| 135 |
+
logger.info(
|
| 136 |
+
"CNN feature extractor: input %s → output %s",
|
| 137 |
+
extractor.input_shape,
|
| 138 |
+
extractor.output_shape,
|
| 139 |
+
)
|
| 140 |
+
return extractor
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
# ---------------------------------------------------------------------------
|
| 144 |
+
# Public API
|
| 145 |
+
# ---------------------------------------------------------------------------
|
| 146 |
+
|
| 147 |
+
def load_models() -> ModelStore:
|
| 148 |
+
"""
|
| 149 |
+
Load all models and return a populated ModelStore.
|
| 150 |
+
Call this once from the FastAPI lifespan event.
|
| 151 |
+
"""
|
| 152 |
+
global _store
|
| 153 |
+
|
| 154 |
+
from app import config # local import to avoid circular at module level
|
| 155 |
+
|
| 156 |
+
store = ModelStore()
|
| 157 |
+
|
| 158 |
+
# ---- Pipeline A --------------------------------------------------------
|
| 159 |
+
if os.path.exists(config.PIPELINE_A_MODEL):
|
| 160 |
+
store.xgb_model = _load_pickle(config.PIPELINE_A_MODEL, "xgb model.pkl")
|
| 161 |
+
else:
|
| 162 |
+
logger.warning("Pipeline A model not found: %s", config.PIPELINE_A_MODEL)
|
| 163 |
+
|
| 164 |
+
# ---- Pipeline B --------------------------------------------------------
|
| 165 |
+
if os.path.exists(config.PIPELINE_B_AE) and os.path.exists(config.PIPELINE_B_LGBM):
|
| 166 |
+
store.encoder_model = _build_encoder(config.PIPELINE_B_AE)
|
| 167 |
+
store.lgbm_model = _load_pickle(config.PIPELINE_B_LGBM, "lgbm_model.pkl")
|
| 168 |
+
else:
|
| 169 |
+
logger.warning("Pipeline B models not found — B will be skipped.")
|
| 170 |
+
|
| 171 |
+
# ---- Pipeline C --------------------------------------------------------
|
| 172 |
+
if os.path.exists(config.PIPELINE_C_CNN) and os.path.exists(config.PIPELINE_C_SVM):
|
| 173 |
+
store.cnn_model = _build_cnn_feature_extractor(config.PIPELINE_C_CNN)
|
| 174 |
+
store.svm_model = _load_pickle(config.PIPELINE_C_SVM, "svm_model.pkl")
|
| 175 |
+
else:
|
| 176 |
+
logger.warning("Pipeline C models not found — C will be skipped.")
|
| 177 |
+
|
| 178 |
+
store.loaded = True
|
| 179 |
+
logger.info("All models loaded successfully.")
|
| 180 |
+
_store = store
|
| 181 |
+
return store
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
def get_model_store() -> ModelStore:
|
| 185 |
+
"""Return the singleton ModelStore (must have been loaded via load_models() first)."""
|
| 186 |
+
if _store is None or not _store.loaded:
|
| 187 |
+
raise RuntimeError("ModelStore has not been initialised — call load_models() first.")
|
| 188 |
+
return _store
|
backend/app/schemas.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Pydantic request / response schemas for SanketSetu backend.
|
| 3 |
+
"""
|
| 4 |
+
from __future__ import annotations
|
| 5 |
+
|
| 6 |
+
from typing import List, Optional
|
| 7 |
+
|
| 8 |
+
from pydantic import BaseModel, Field, field_validator
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
# ---------------------------------------------------------------------------
|
| 12 |
+
# Requests
|
| 13 |
+
# ---------------------------------------------------------------------------
|
| 14 |
+
|
| 15 |
+
class LandmarkMessage(BaseModel):
|
| 16 |
+
"""
|
| 17 |
+
Payload sent by the client over /ws/landmarks or POST /api/predict.
|
| 18 |
+
'landmarks' is a flat list of [x0,y0,z0, x1,y1,z1, ..., x20,y20,z20]
|
| 19 |
+
extracted by MediaPipe Hands on the browser side.
|
| 20 |
+
"""
|
| 21 |
+
landmarks: List[float] = Field(..., min_length=63, max_length=63)
|
| 22 |
+
session_id: str = Field(default="default")
|
| 23 |
+
|
| 24 |
+
@field_validator("landmarks")
|
| 25 |
+
@classmethod
|
| 26 |
+
def must_be_63_floats(cls, v: List[float]) -> List[float]:
|
| 27 |
+
if len(v) != 63:
|
| 28 |
+
raise ValueError(f"landmarks must contain exactly 63 values, got {len(v)}")
|
| 29 |
+
return v
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
class ImageMessage(BaseModel):
|
| 33 |
+
"""
|
| 34 |
+
Payload sent when Pipeline C (CNN+SVM) is invoked via /ws/image.
|
| 35 |
+
'image_b64' is a base-64 encoded JPEG of the cropped hand region (128×128).
|
| 36 |
+
"""
|
| 37 |
+
image_b64: str = Field(..., description="Base-64 encoded JPEG of the hand crop (128×128 px)")
|
| 38 |
+
session_id: str = Field(default="default")
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
class EnsembleMessage(BaseModel):
|
| 42 |
+
"""
|
| 43 |
+
Combined payload: landmarks + optional image for the full ensemble pipeline.
|
| 44 |
+
"""
|
| 45 |
+
landmarks: List[float] = Field(..., min_length=63, max_length=63)
|
| 46 |
+
image_b64: Optional[str] = Field(default=None)
|
| 47 |
+
session_id: str = Field(default="default")
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
# ---------------------------------------------------------------------------
|
| 51 |
+
# Responses
|
| 52 |
+
# ---------------------------------------------------------------------------
|
| 53 |
+
|
| 54 |
+
class PredictionResponse(BaseModel):
|
| 55 |
+
sign: str = Field(..., description="Gujarati sign character(s)")
|
| 56 |
+
confidence: float = Field(..., ge=0.0, le=1.0)
|
| 57 |
+
pipeline: str = Field(..., description="Which pipeline(s) produced this result: A, B, C, or ensemble")
|
| 58 |
+
label_index: int = Field(..., ge=0, le=33)
|
| 59 |
+
probabilities: Optional[List[float]] = Field(
|
| 60 |
+
default=None,
|
| 61 |
+
description="Full 34-class probability vector (optional, increases payload size)"
|
| 62 |
+
)
|
| 63 |
+
latency_ms: Optional[float] = Field(default=None, description="Server-side inference latency in ms")
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
class HealthResponse(BaseModel):
|
| 67 |
+
status: str
|
| 68 |
+
models_loaded: bool
|
| 69 |
+
pipelines_available: List[str]
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
class ErrorResponse(BaseModel):
|
| 73 |
+
error: str
|
| 74 |
+
detail: Optional[str] = None
|
backend/requirements-dev.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Development + testing dependencies (not required in production)
|
| 2 |
+
pytest>=9.0
|
| 3 |
+
httpx>=0.28
|
| 4 |
+
pytest-anyio
|
backend/requirements.txt
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi>=0.115.0
|
| 2 |
+
uvicorn[standard]>=0.30.0
|
| 3 |
+
websockets>=12.0
|
| 4 |
+
xgboost>=2.0.0
|
| 5 |
+
lightgbm>=4.3.0
|
| 6 |
+
scikit-learn>=1.4.0
|
| 7 |
+
keras==3.13.2
|
| 8 |
+
tensorflow-cpu>=2.20.0
|
| 9 |
+
numpy>=1.26.0
|
| 10 |
+
opencv-python-headless>=4.9.0
|
| 11 |
+
pillow>=10.3.0
|
| 12 |
+
python-dotenv>=1.0.0
|
backend/tests/__init__.py
ADDED
|
File without changes
|
backend/tests/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (164 Bytes). View file
|
|
|
backend/tests/__pycache__/conftest.cpython-312-pytest-9.0.2.pyc
ADDED
|
Binary file (4.07 kB). View file
|
|
|
backend/tests/__pycache__/test_pipeline_a.cpython-312-pytest-9.0.2.pyc
ADDED
|
Binary file (14.4 kB). View file
|
|
|
backend/tests/__pycache__/test_pipeline_b.cpython-312-pytest-9.0.2.pyc
ADDED
|
Binary file (11.6 kB). View file
|
|
|