devrajsinh2012 commited on
Commit
cf93910
·
0 Parent(s):

Initial commit: SanketSetu - Sign Language Recognition System

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .dockerignore +29 -0
  2. .gitattributes +2 -0
  3. .github/workflows/deploy-backend.yml +61 -0
  4. .github/workflows/deploy-frontend.yml +45 -0
  5. CNN_Autoencoder_LightGBM/autoencoder_model.pkl +3 -0
  6. CNN_Autoencoder_LightGBM/autoencoder_model.pth +3 -0
  7. CNN_Autoencoder_LightGBM/lgbm_model.pkl +3 -0
  8. CNN_Autoencoder_LightGBM/lgbm_model.pth +3 -0
  9. CNN_PreTrained/cnn_model.pkl +3 -0
  10. CNN_PreTrained/cnn_model.pth +3 -0
  11. CNN_PreTrained/svm_model.pkl +3 -0
  12. CNN_PreTrained/svm_model.pth +3 -0
  13. Dockerfile +48 -0
  14. Mediapipe_XGBoost/model.pkl +3 -0
  15. Mediapipe_XGBoost/model.pth +3 -0
  16. README.md +108 -0
  17. SanketSetu_ Production-Grade Implementation Plan.md +99 -0
  18. TASKS.md +284 -0
  19. backend/.env.example +32 -0
  20. backend/app/__init__.py +1 -0
  21. backend/app/__pycache__/__init__.cpython-312.pyc +0 -0
  22. backend/app/__pycache__/config.cpython-312.pyc +0 -0
  23. backend/app/__pycache__/main.cpython-312.pyc +0 -0
  24. backend/app/__pycache__/schemas.cpython-312.pyc +0 -0
  25. backend/app/config.py +64 -0
  26. backend/app/inference/__init__.py +1 -0
  27. backend/app/inference/__pycache__/__init__.cpython-312.pyc +0 -0
  28. backend/app/inference/__pycache__/ensemble.cpython-312.pyc +0 -0
  29. backend/app/inference/__pycache__/pipeline_a.cpython-312.pyc +0 -0
  30. backend/app/inference/__pycache__/pipeline_b.cpython-312.pyc +0 -0
  31. backend/app/inference/__pycache__/pipeline_c.cpython-312.pyc +0 -0
  32. backend/app/inference/ensemble.py +138 -0
  33. backend/app/inference/pipeline_a.py +57 -0
  34. backend/app/inference/pipeline_b.py +59 -0
  35. backend/app/inference/pipeline_c.py +86 -0
  36. backend/app/main.py +280 -0
  37. backend/app/models/__init__.py +1 -0
  38. backend/app/models/__pycache__/__init__.cpython-312.pyc +0 -0
  39. backend/app/models/__pycache__/label_map.cpython-312.pyc +0 -0
  40. backend/app/models/__pycache__/loader.cpython-312.pyc +0 -0
  41. backend/app/models/label_map.py +58 -0
  42. backend/app/models/loader.py +188 -0
  43. backend/app/schemas.py +74 -0
  44. backend/requirements-dev.txt +4 -0
  45. backend/requirements.txt +12 -0
  46. backend/tests/__init__.py +0 -0
  47. backend/tests/__pycache__/__init__.cpython-312.pyc +0 -0
  48. backend/tests/__pycache__/conftest.cpython-312-pytest-9.0.2.pyc +0 -0
  49. backend/tests/__pycache__/test_pipeline_a.cpython-312-pytest-9.0.2.pyc +0 -0
  50. backend/tests/__pycache__/test_pipeline_b.cpython-312-pytest-9.0.2.pyc +0 -0
.dockerignore ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ── Python ────────────────────────────────────────────────────────────────────
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.pyo
5
+ .venv/
6
+ *.egg-info/
7
+ .pytest_cache/
8
+ .mypy_cache/
9
+ dist/
10
+
11
+ # ── Node / Frontend ───────────────────────────────────────────────────────────
12
+ frontend/node_modules/
13
+ frontend/dist/
14
+ frontend/.env.local
15
+
16
+ # ── Git / Editor ──────────────────────────────────────────────────────────────
17
+ .git/
18
+ .gitignore
19
+ .vscode/
20
+ *.md
21
+ TASKS.md
22
+
23
+ # ── OS ────────────────────────────────────────────────────────────────────────
24
+ .DS_Store
25
+ Thumbs.db
26
+
27
+ # ── Model binary variants (keep only .pkl, not duplicate .pth) ───────────────
28
+ # Both extensions are identical — Docker only needs .pkl
29
+ **/*.pth
.gitattributes ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *.pth filter=lfs diff=lfs merge=lfs -text
2
+ *.pkl filter=lfs diff=lfs merge=lfs -text
.github/workflows/deploy-backend.yml ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Deploy Backend → Fly.io
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ paths:
7
+ - 'backend/**'
8
+ - 'Dockerfile'
9
+ - '.dockerignore'
10
+ - 'fly.toml'
11
+ - 'Mediapipe_XGBoost/**'
12
+ - 'CNN_Autoencoder_LightGBM/**'
13
+ - 'CNN_PreTrained/**'
14
+
15
+ jobs:
16
+ test:
17
+ name: Run backend tests
18
+ runs-on: ubuntu-latest
19
+ steps:
20
+ - uses: actions/checkout@v4
21
+
22
+ - uses: actions/setup-python@v5
23
+ with:
24
+ python-version: '3.12'
25
+ cache: pip
26
+ cache-dependency-path: backend/requirements.txt
27
+
28
+ - name: Install deps
29
+ run: |
30
+ pip install -r backend/requirements.txt
31
+ pip install -r backend/requirements-dev.txt
32
+
33
+ - name: Run tests
34
+ working-directory: backend
35
+ env:
36
+ KERAS_BACKEND: tensorflow
37
+ TF_CPP_MIN_LOG_LEVEL: "3"
38
+ CUDA_VISIBLE_DEVICES: ""
39
+ TF_ENABLE_ONEDNN_OPTS: "0"
40
+ run: pytest tests/ -v --tb=short -q
41
+ # Note: tests will be skipped automatically if model .pkl files are absent
42
+ # (model artefacts are gitignored). Add them as GitHub Actions artifacts
43
+ # or use DVC/GCS to restore them in CI if you want full test coverage.
44
+
45
+ deploy:
46
+ name: Deploy to Fly.io
47
+ needs: test
48
+ runs-on: ubuntu-latest
49
+ environment: production
50
+ concurrency:
51
+ group: fly-deploy
52
+ cancel-in-progress: true
53
+ steps:
54
+ - uses: actions/checkout@v4
55
+
56
+ - uses: superfly/flyctl-actions/setup-flyctl@master
57
+
58
+ - name: Deploy
59
+ run: flyctl deploy --remote-only
60
+ env:
61
+ FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
.github/workflows/deploy-frontend.yml ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Deploy Frontend → Vercel
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ paths:
7
+ - 'frontend/**'
8
+
9
+ jobs:
10
+ build-and-deploy:
11
+ name: Build & Deploy
12
+ runs-on: ubuntu-latest
13
+ environment: production
14
+ steps:
15
+ - uses: actions/checkout@v4
16
+
17
+ - uses: actions/setup-node@v4
18
+ with:
19
+ node-version: '22'
20
+ cache: npm
21
+ cache-dependency-path: frontend/package-lock.json
22
+
23
+ - name: Install dependencies
24
+ working-directory: frontend
25
+ run: npm ci
26
+
27
+ - name: Type-check
28
+ working-directory: frontend
29
+ run: npx tsc --project tsconfig.app.json --noEmit
30
+
31
+ - name: Build
32
+ working-directory: frontend
33
+ env:
34
+ VITE_WS_URL: ${{ vars.VITE_WS_URL }}
35
+ VITE_API_URL: ${{ vars.VITE_API_URL }}
36
+ run: npm run build
37
+
38
+ - name: Deploy to Vercel
39
+ uses: amondnet/vercel-action@v25
40
+ with:
41
+ vercel-token: ${{ secrets.VERCEL_TOKEN }}
42
+ vercel-org-id: ${{ secrets.VERCEL_ORG_ID }}
43
+ vercel-project-id: ${{ secrets.VERCEL_PROJECT_ID }}
44
+ working-directory: frontend
45
+ vercel-args: '--prod'
CNN_Autoencoder_LightGBM/autoencoder_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5843688f059d26851774e553c4afddbc7c0f2f7fc048401b8447f290a63d2cbe
3
+ size 92934
CNN_Autoencoder_LightGBM/autoencoder_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3593536edda0328121d5f92fd186a8e40c341799bd9bb703e0e2ad155b6e7aeb
3
+ size 121321
CNN_Autoencoder_LightGBM/lgbm_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e83d2bb3a18da0b3ccdd7afc5d044fa52c6e70c4e6090b312a622a866ee0008
3
+ size 3623126
CNN_Autoencoder_LightGBM/lgbm_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a09f7b712da5f0e6b63e222e4ea938029567bd8cf496da7ad93752d54219b57
3
+ size 3626367
CNN_PreTrained/cnn_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:464df17407edea99db1b69c20e7ff718f6ceafb05f1bbeaacc889499e4cd920a
3
+ size 97136794
CNN_PreTrained/cnn_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6eb8e3419763c47b5ba2480ccaf9907e8d748602b26fe59c009b6112fa840ae5
3
+ size 146278905
CNN_PreTrained/svm_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf21a17c1340b84359c3431fc4ae8eb05239e4e1ef58dd34ab775f53b9bc7f53
3
+ size 929927
CNN_PreTrained/svm_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e706ebf4588c580d0e6ac6f1554f9fd2eaef5564ee02f8022e3ca5f13bb8985b
3
+ size 1079865
Dockerfile ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ─────────────────────────────────────────────────────────────────────────────
2
+ # SanketSetu Backend — Dockerfile
3
+ # Build context: repo root (SanketSetu/)
4
+ #
5
+ # docker build -t sanketsetu-backend .
6
+ # docker run -p 8000:8000 sanketsetu-backend
7
+ # ─────────────────────────────────────────────────────────────────────────────
8
+
9
+ FROM python:3.12-slim AS base
10
+
11
+ # System libraries needed by OpenCV headless + Pillow
12
+ RUN apt-get update && apt-get install -y --no-install-recommends \
13
+ libgl1 libglib2.0-0 libgomp1 \
14
+ && rm -rf /var/lib/apt/lists/*
15
+
16
+ # ── Python dependencies (cached layer) ───────────────────────────────────────
17
+ WORKDIR /app
18
+ COPY backend/requirements.txt ./
19
+ RUN pip install --no-cache-dir -r requirements.txt
20
+
21
+ # ── Application source ────────────────────────────────────────────────────────
22
+ COPY backend/app/ ./app/
23
+
24
+ # ── Model artefacts ───────────────────────────────────────────────────────────
25
+ # Copied to /models so the container is fully self-contained.
26
+ # Override at runtime with -e WEIGHTS_DIR=/mnt/models + bind-mount if preferred.
27
+ COPY Mediapipe_XGBoost/ /models/Mediapipe_XGBoost/
28
+ COPY CNN_Autoencoder_LightGBM/ /models/CNN_Autoencoder_LightGBM/
29
+ COPY CNN_PreTrained/ /models/CNN_PreTrained/
30
+
31
+ # ── Runtime environment ───────────────────────────────────────────────────────
32
+ ENV WEIGHTS_DIR=/models \
33
+ KERAS_BACKEND=tensorflow \
34
+ TF_CPP_MIN_LOG_LEVEL=3 \
35
+ CUDA_VISIBLE_DEVICES="" \
36
+ TF_ENABLE_ONEDNN_OPTS=0 \
37
+ OMP_NUM_THREADS=4 \
38
+ PYTHONDONTWRITEBYTECODE=1 \
39
+ PYTHONUNBUFFERED=1
40
+
41
+ EXPOSE 8000
42
+
43
+ # ── Health-check ──────────────────────────────────────────────────────────────
44
+ # Wait up to 3 minutes for models to load before marking the container healthy.
45
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=180s --retries=3 \
46
+ CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health', timeout=5)"
47
+
48
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
Mediapipe_XGBoost/model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a736b83df3e56b69b0f1c11f018257760746969d6598d90ea2a60c78f8305883
3
+ size 1711525
Mediapipe_XGBoost/model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ff5f1cbc121be57f2a7fe04b38925ea740fe79602a6205ca09a748cb0f20b81
3
+ size 1895969
README.md ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SanketSetu
2
+
3
+ A real-time sign language recognition system using machine learning and computer vision.
4
+
5
+ ## Overview
6
+
7
+ SanketSetu is an intelligent sign language interpretation system that provides real-time recognition and translation of sign language gestures using advanced machine learning models and MediaPipe hand tracking.
8
+
9
+ ## Project Structure
10
+
11
+ ```
12
+ ├── backend/ # FastAPI backend server
13
+ │ ├── app/ # Main application code
14
+ │ │ ├── inference/ # ML inference pipelines
15
+ │ │ └── models/ # Model loading and management
16
+ │ └── tests/ # Backend tests
17
+ ├── frontend/ # React + TypeScript frontend
18
+ │ └── src/
19
+ │ ├── components/ # React components
20
+ │ ├── hooks/ # Custom React hooks
21
+ │ └── lib/ # Utility libraries
22
+ ├── CNN_Autoencoder_LightGBM/ # CNN Autoencoder + LightGBM model
23
+ ├── CNN_PreTrained/ # CNN + SVM model
24
+ └── Mediapipe_XGBoost/ # MediaPipe + XGBoost model
25
+ ```
26
+
27
+ ## Features
28
+
29
+ - Real-time sign language gesture recognition
30
+ - Multiple ML model ensemble approach
31
+ - WebSocket-based real-time communication
32
+ - MediaPipe hand landmark tracking
33
+ - Interactive webcam feed with visual feedback
34
+ - Prediction confidence display
35
+
36
+ ## Tech Stack
37
+
38
+ ### Backend
39
+ - FastAPI
40
+ - Python 3.x
41
+ - PyTorch
42
+ - LightGBM
43
+ - XGBoost
44
+ - MediaPipe
45
+
46
+ ### Frontend
47
+ - React
48
+ - TypeScript
49
+ - Vite
50
+ - TailwindCSS
51
+
52
+ ## Getting Started
53
+
54
+ ### Prerequisites
55
+ - Python 3.8+
56
+ - Node.js 16+
57
+ - npm or yarn
58
+
59
+ ### Backend Setup
60
+
61
+ ```bash
62
+ cd backend
63
+ pip install -r requirements.txt
64
+ python -m app.main
65
+ ```
66
+
67
+ ### Frontend Setup
68
+
69
+ ```bash
70
+ cd frontend
71
+ npm install
72
+ npm run dev
73
+ ```
74
+
75
+ ## Development
76
+
77
+ Run the development servers:
78
+
79
+ ```bash
80
+ # Start both frontend and backend
81
+ .\start.ps1
82
+ ```
83
+
84
+ ## Docker
85
+
86
+ Build and run using Docker:
87
+
88
+ ```bash
89
+ docker build -t sanketsetu .
90
+ docker run -p 8000:8000 sanketsetu
91
+ ```
92
+
93
+ ## Testing
94
+
95
+ Run backend tests:
96
+
97
+ ```bash
98
+ cd backend
99
+ pytest
100
+ ```
101
+
102
+ ## License
103
+
104
+ All rights reserved.
105
+
106
+ ## Author
107
+
108
+ Devrajsinh Gohil (devrajsinh2012)
SanketSetu_ Production-Grade Implementation Plan.md ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SanketSetu: Production-Grade Implementation Plan
2
+
3
+ ## 1. Executive Summary
4
+ **SanketSetu** (Bridge of Signs) is a high-performance, real-time Gujarati Sign Language (GSL) recognition system. This document outlines a production-ready architecture designed to run entirely on **free-tier cloud services**. The system leverages a decoupled architecture with a React-based interactive frontend and a FastAPI backend, ensuring low-latency inference and a seamless user experience.
5
+
6
+ ---
7
+
8
+ ## 2. High-Level System Architecture
9
+ The system follows a modern microservices-inspired pattern to ensure scalability and ease of updates.
10
+
11
+ | Component | Technology | Role | Hosting (Free Tier) |
12
+ | :--- | :--- | :--- | :--- |
13
+ | **Frontend** | React + Vite + TS | User interface, webcam capture, real-time feedback | **Vercel** |
14
+ | **Backend API** | FastAPI (Python) | WebSocket management, API gateway, logic | **Fly.io** |
15
+ | **Inference Engine** | ONNX Runtime / XGBoost | High-speed model execution | **Fly.io** (Internal) |
16
+ | **Storage** | Cloudflare R2 | S3-compatible storage for model weights | **Cloudflare** |
17
+ | **Real-time** | WebSockets (WSS) | Low-latency frame-by-frame data transfer | N/A |
18
+
19
+ ---
20
+
21
+ ## 3. Backend Implementation Details
22
+
23
+ ### 3.1 API Design (FastAPI)
24
+ The backend is built for speed. It handles binary data from WebSockets to minimize overhead.
25
+
26
+ * **WebSocket Protocol**: The client sends a stream of normalized hand landmark coordinates (63 points per frame) extracted locally via MediaPipe. This reduces bandwidth significantly compared to sending raw video frames.
27
+ * **Concurrency**: Uses `asyncio` to handle multiple simultaneous user connections without blocking the event loop.
28
+ * **Model Loading**: Models are loaded into memory at startup using a Singleton pattern to ensure zero-latency on the first request.
29
+
30
+ ### 3.2 Model Serving Strategy
31
+ 1. **Primary Model**: The **XGBoost** model is used as the default due to its sub-millisecond inference time.
32
+ 2. **Backup/Ensemble**: The system can optionally query the **CNN+SVM** or **LGBM** models for high-confidence verification if the XGBoost score is below a certain threshold.
33
+ 3. **Optimization**: Models are converted to **ONNX** format to leverage the ONNX Runtime's hardware-specific optimizations, even on free-tier CPU instances.
34
+
35
+ ---
36
+
37
+ ## 4. Frontend & Interactive UI/UX
38
+
39
+ The frontend is designed to be "cool," responsive, and highly interactive, providing users with a "futuristic" feel.
40
+
41
+ ### 4.1 Tech Stack
42
+ * **Styling**: Tailwind CSS for rapid, modern UI development.
43
+ * **Animations**: Framer Motion for smooth transitions, layout changes, and interactive elements.
44
+ * **Icons**: Lucide React for a clean, consistent icon set.
45
+
46
+ ### 4.2 Key UI Features
47
+ * **Glassmorphism Design**: Use of semi-transparent backgrounds with blur effects for a modern look.
48
+ * **Interactive Landmark Overlay**: A canvas overlay on the webcam feed that draws the 21 hand landmarks in real-time. Landmarks will "glow" when a sign is successfully recognized.
49
+ * **Dynamic Prediction HUD**: A Head-Up Display (HUD) style interface that shows the current prediction, confidence level, and a history of recently detected signs.
50
+ * **Responsive Layout**: Fully functional on mobile and desktop, with optimized camera controls for both.
51
+
52
+ ### 4.3 User Experience Flow
53
+ 1. **Onboarding**: A quick, animated guide on how to position the hand for best results.
54
+ 2. **Calibration**: A brief "Ready?" state that ensures the lighting and hand distance are optimal.
55
+ 3. **Real-time Translation**: Instant feedback as the user signs, with the translated Gujarati text appearing in a stylized "speech bubble" or text box.
56
+
57
+ ---
58
+
59
+ ## 4. Deployment & DevOps
60
+
61
+ ### 4.1 Continuous Integration/Deployment (CI/CD)
62
+ Using **GitHub Actions**, the project will follow a strict deployment pipeline:
63
+ 1. **Lint & Test**: Ensure code quality and run unit tests for ML logic.
64
+ 2. **Build**: Create optimized production builds for the React app and Dockerize the FastAPI backend.
65
+ 3. **Deploy**:
66
+ * Frontend automatically pushes to **Vercel**.
67
+ * Backend pushes to **Fly.io** using `flyctl`.
68
+
69
+ ### 4.2 Scalability & Cost Management
70
+ * **Scale-to-Zero**: The backend on Fly.io can be configured to sleep when not in use to preserve free-tier resources.
71
+ * **CDN Caching**: Vercel's Edge Network will cache all static assets, ensuring fast load times globally.
72
+
73
+ ---
74
+
75
+ ## 5. Implementation Roadmap
76
+
77
+ ### Phase 1: Core Backend & ML Integration
78
+ - [ ] Set up FastAPI project structure.
79
+ - [ ] Implement WebSocket handler for landmark data.
80
+ - [ ] Integrate the trained XGBoost model for real-time inference.
81
+
82
+ ### Phase 2: Advanced Frontend Development
83
+ - [ ] Initialize Vite + React project with Tailwind.
84
+ - [ ] Implement webcam capture and MediaPipe landmark extraction (client-side).
85
+ - [ ] Create the interactive HUD and glassmorphism UI.
86
+
87
+ ### Phase 3: Production Hardening
88
+ - [ ] Set up GitHub Actions for automated deployment.
89
+ - [ ] Implement error handling for low-bandwidth scenarios.
90
+ - [ ] Finalize documentation and user guide.
91
+
92
+ ---
93
+
94
+ ## 6. References
95
+ [1] [FastAPI Documentation](https://fastapi.tiangolo.com/) - High-performance web framework for building APIs.
96
+ [2] [MediaPipe Hands](https://developers.google.com/mediapipe/solutions/vision/hand_landmarker) - Real-time hand landmark detection.
97
+ [3] [Framer Motion](https://www.framer.com/motion/) - A production-ready motion library for React.
98
+ [4] [Fly.io Free Tier](https://fly.io/docs/about/pricing/) - Details on free-tier resource allocation.
99
+ [5] [Vercel Deployment](https://vercel.com/docs/deployments/overview) - Global CDN and hosting for frontend applications.
TASKS.md ADDED
@@ -0,0 +1,284 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SanketSetu — Execution TODO & Implementation Tracker
2
+
3
+ ## Model Analysis (Reviewed 2026-03-02)
4
+
5
+ All 5 model files inspected. Three distinct inference pipelines exist:
6
+
7
+ | Pipeline | Files | Input | Process | Output |
8
+ |---|---|---|---|---|
9
+ | **A — Primary (Fastest)** | `Mediapipe_XGBoost/model.pkl` | 63 MediaPipe coords (21 landmarks × x,y,z) | XGBClassifier (50 trees) | 34-class probability |
10
+ | **B — Autoencoder + LGBM** | `CNN_Autoencoder_LightGBM/autoencoder_model.pkl` + `lgbm_model.pkl` | 63 MediaPipe coords | Encoder (63→32→**16** bottleneck) + LGBMClassifier | 34-class probability |
11
+ | **C — Vision CNN + SVM** | `CNN_PreTrained/cnn_model.pkl` + `svm_model.pkl` | 128×128×3 RGB image | ResNet50-based CNN (179 layers) → 256 features + SVC(C=10) | 34-class probability w/ probability=True |
12
+
13
+ ### Key Architecture Facts
14
+ - **34 classes** (Gujarati Sign Language alphabet + digits, labels 0–33)
15
+ - **Pipeline A** input: 63 floats — directly from MediaPipe `hand_landmarks` (x, y, z per landmark, flattened)
16
+ - **Pipeline B** input: same 63 floats → takes only the encoder half (first 3 Dense layers, output of `dense_1` layer = 16 features)
17
+ - **Pipeline C** input: 128×128 BGR/RGB cropped hand image, normalized to [0,1]
18
+ - All `.pth` files are identical copies of the `.pkl` files (same objects, different extension)
19
+ - Model quality strategy: A is primary (sub-ms); if confidence < threshold, query B or C for ensemble
20
+
21
+ ---
22
+
23
+ ## Project Folder Structure to Create
24
+
25
+ ```
26
+ SanketSetu/
27
+ ├── backend/ ← FastAPI server
28
+ │ ├── app/
29
+ │ │ ├── main.py ← FastAPI entry, WebSocket + REST
30
+ │ │ ├── models/
31
+ │ │ │ ├── loader.py ← Singleton model loader
32
+ │ │ │ └── label_map.py ← 0–33 → Gujarati sign name mapping
33
+ │ │ ├── inference/
34
+ │ │ │ ├── pipeline_a.py ← XGBoost inference (63 landmarks)
35
+ │ │ │ ├── pipeline_b.py ← Autoencoder encoder + LightGBM
36
+ │ │ │ ├── pipeline_c.py ← ResNet CNN + SVM (image-based)
37
+ │ │ │ └── ensemble.py ← Confidence-weighted ensemble logic
38
+ │ │ ├── schemas.py ← Pydantic request/response models
39
+ │ │ └── config.py ← Settings (confidence threshold, etc.)
40
+ │ ├── weights/ ← Symlink or copy of model pkl files
41
+ │ ├── requirements.txt
42
+ │ ├── Dockerfile
43
+ │ └── fly.toml
44
+
45
+ ├── frontend/ ← Vite + React + TS
46
+ │ ├── src/
47
+ │ │ ├── components/
48
+ │ │ │ ├── WebcamFeed.tsx ← Webcam + canvas landmark overlay
49
+ │ │ │ ├── LandmarkCanvas.tsx ← Draws 21 hand points + connections
50
+ │ │ │ ├── PredictionHUD.tsx ← Live sign, confidence bar, history
51
+ │ │ │ ├── OnboardingGuide.tsx ← Animated intro wizard
52
+ │ │ │ └── Calibration.tsx ← Lighting/distance check UI
53
+ │ │ ├── hooks/
54
+ │ │ │ ├── useWebSocket.ts ← WS connection, send/receive
55
+ │ │ │ ├── useMediaPipe.ts ← MediaPipe Hands JS integration
56
+ │ │ │ └── useWebcam.ts ← Camera permissions + stream
57
+ │ │ ├── lib/
58
+ │ │ │ └── landmarkUtils.ts ← Landmark normalization (mirror XGBoost preprocessing)
59
+ │ │ ├── App.tsx
60
+ │ │ └── main.tsx
61
+ │ ├── public/
62
+ │ ├── index.html
63
+ │ ├── tailwind.config.ts
64
+ │ ├── vite.config.ts
65
+ │ └── package.json
66
+
67
+ ├── CNN_Autoencoder_LightGBM/ ← (existing)
68
+ ├── CNN_PreTrained/ ← (existing)
69
+ ├── Mediapipe_XGBoost/ ← (existing)
70
+ └── .github/
71
+ └── workflows/
72
+ ├── deploy-backend.yml
73
+ └── deploy-frontend.yml
74
+ ```
75
+
76
+ ---
77
+
78
+ ## Phase 1 — Backend Core (FastAPI + Model Integration)
79
+
80
+ ### 1.1 Project Bootstrap
81
+ - [x] Create `backend/` folder and `app/` package structure
82
+ - [x] Create `backend/requirements.txt` with: `fastapi`, `uvicorn[standard]`, `websockets`, `xgboost`, `lightgbm`, `scikit-learn`, `keras==3.13.2`, `tensorflow-cpu`, `numpy`, `opencv-python-headless`, `pillow`, `python-dotenv`
83
+ - [x] Create `backend/app/config.py` — confidence threshold (default 0.7), WebSocket max connections, pipeline mode (A/B/C/ensemble)
84
+ - [x] Create `backend/app/models/label_map.py` — map class indices 0–33 to Gujarati sign names
85
+
86
+ ### 1.2 Model Loader (Singleton)
87
+ - [x] Create `backend/app/models/loader.py`
88
+ - Load `model.pkl` (XGBoost) at startup
89
+ - Load `autoencoder_model.pkl` (extract encoder layers only: input → dense → dense_1) and `lgbm_model.pkl`
90
+ - Load `cnn_model.pkl` (full ResNet50 feature extractor, strip any classification head) and `svm_model.pkl`
91
+ - Expose `ModelStore` singleton accessed via `get_model_store()` dependency
92
+ - Log load times for each model
93
+
94
+ ### 1.3 Pipeline A — XGBoost (Primary, Landmarks)
95
+ - [x] Create `backend/app/inference/pipeline_a.py`
96
+ - Input: `List[float]` of length 63 (x,y,z per landmark, already normalized by MediaPipe)
97
+ - Output: `{"sign": str, "confidence": float, "probabilities": List[float]}`
98
+ - Use `model.predict_proba(np.array(landmarks).reshape(1,-1))[0]`
99
+ - Return `classes_[argmax]` and `max(probabilities)` as confidence
100
+
101
+ ### 1.4 Pipeline B — Autoencoder Encoder + LightGBM
102
+ - [x] Create `backend/app/inference/pipeline_b.py`
103
+ - Build encoder-only submodel: `encoder = keras.Model(inputs=model.input, outputs=model.layers[2].output)` (output of `dense_1`, the 16-D bottleneck)
104
+ - Input: 63 MediaPipe coords
105
+ - Encode: `features = encoder.predict(np.array(landmarks).reshape(1,-1))[0]` → shape (16,)
106
+ - Classify: `lgbm.predict_proba(features.reshape(1,-1))[0]`
107
+
108
+ ### 1.5 Pipeline C — CNN + SVM (Image-based)
109
+ - [x] Create `backend/app/inference/pipeline_c.py`
110
+ - Input: base64-encoded JPEG or raw bytes of the cropped hand region (128×128 px)
111
+ - Decode → numpy array (128,128,3) uint8 → normalize to float32 [0,1]
112
+ - `features = cnn_model.predict(img[np.newaxis])[0]` → shape (256,)
113
+ - `proba = svm.predict_proba(features.reshape(1,-1))[0]`
114
+ - Note: CNN inference is slower (~50–200ms on CPU); only call when Pipeline A confidence < threshold
115
+
116
+ ### 1.6 Ensemble Logic
117
+ - [x] Create `backend/app/inference/ensemble.py`
118
+ - Call Pipeline A first
119
+ - If `confidence < config.THRESHOLD` (default 0.7), call Pipeline B
120
+ - If still below threshold and image data available, call Pipeline C
121
+ - Final result: weighted average of probabilities from each pipeline that was called
122
+ - Return the top predicted class and ensemble confidence score
123
+
124
+ ### 1.7 WebSocket Handler
125
+ - [x] Create `backend/app/main.py` with FastAPI app
126
+ - [x] Implement `GET /health` — returns `{"status": "ok", "models_loaded": true}`
127
+ - [x] Implement `WS /ws/landmarks` — primary endpoint
128
+ - Client sends JSON: `{"landmarks": [63 floats], "session_id": "..."}`
129
+ - Server responds: `{"sign": "...", "confidence": 0.95, "pipeline": "A", "label_index": 12}`
130
+ - Handle disconnect gracefully
131
+ - [x] Implement `WS /ws/image` — optional image-based endpoint for Pipeline C
132
+ - Client sends JSON: `{"image_b64": "...", "session_id": "..."}`
133
+ - [x] Implement `POST /api/predict` — REST fallback for non-WS clients
134
+ - Body: `{"landmarks": [63 floats]}`
135
+ - Returns same response schema as WS
136
+
137
+ ### 1.8 Schemas & Validation
138
+ - [x] Create `backend/app/schemas.py`
139
+ - `LandmarkMessage(BaseModel)`: `landmarks: List[float]` (must be length 63), `session_id: str`
140
+ - `ImageMessage(BaseModel)`: `image_b64: str`, `session_id: str`
141
+ - `PredictionResponse(BaseModel)`: `sign: str`, `confidence: float`, `pipeline: str`, `label_index: int`, `probabilities: Optional[List[float]]`
142
+
143
+ ### 1.9 CORS & Middleware
144
+ - [x] Configure CORS for Vercel frontend domain + localhost:5173
145
+ - [x] Add request logging middleware (log session_id, pipeline used, latency ms)
146
+ - [x] Add global exception handler returning proper JSON errors
147
+
148
+ ---
149
+
150
+ ## Phase 2 — Frontend (React + Vite + Tailwind + Framer Motion)
151
+
152
+ ### 2.1 Project Bootstrap
153
+ - [x] Run `npm create vite@latest frontend -- --template react-ts` inside `SanketSetu/`
154
+ - [x] Install deps: `tailwindcss`, `framer-motion`, `lucide-react`, `@mediapipe/tasks-vision`
155
+ - [x] Configure Tailwind with custom palette (dark neon-cyan glassmorphism theme)
156
+ - [x] Set up `vite.config.ts` proxy: `/api` → backend URL, `/ws` → backend WS URL
157
+
158
+ ### 2.2 Webcam Hook (`useWebcam.ts`)
159
+ - [x] Request `getUserMedia({ video: { width: 1280, height: 720 } })`
160
+ - [x] Expose `videoRef`, `isReady`, `error`, `switchCamera()` (for mobile front/back toggle)
161
+ - [x] Handle permission denied state with instructional UI
162
+
163
+ ### 2.3 MediaPipe Hook (`useMediaPipe.ts`)
164
+ - [x] Initialize `HandLandmarker` from `@mediapipe/tasks-vision` (WASM backend)
165
+ - [x] Process video frames at target 30fps using `requestAnimationFrame`
166
+ - [x] Extract `landmarks[0]` (first hand) → flatten to 63 floats `[x0,y0,z0, x1,y1,z1, ...]`
167
+ - [x] Normalize: subtract wrist (landmark 0) position to make translation-invariant — **must match training preprocessing**
168
+ - [x] Expose `landmarks: number[] | null`, `handedness: string`, `isDetecting: boolean`
169
+
170
+ ### 2.4 WebSocket Hook (`useWebSocket.ts`)
171
+ - [x] Connect to `wss://backend-url/ws/landmarks` on mount
172
+ - [x] Auto-reconnect with exponential backoff on disconnect
173
+ - [x] `sendLandmarks(landmarks: number[])` — throttled to max 15 sends/sec
174
+ - [x] Expose `lastPrediction: PredictionResponse | null`, `isConnected: boolean`, `latency: number`
175
+
176
+ ### 2.5 Landmark Canvas (`LandmarkCanvas.tsx`)
177
+ - [x] Overlay `<canvas>` on top of `<video>` with `position: absolute`
178
+ - [x] Draw 21 hand landmark dots (cyan glow: `shadowBlur`, `shadowColor`)
179
+ - [x] Draw 21 bone connections following MediaPipe hand topology (finger segments)
180
+ - [x] On successful prediction: animate landmarks to pulse/glow with Framer Motion spring
181
+ - [x] Use `requestAnimationFrame` for smooth 60fps rendering
182
+
183
+ ### 2.6 Prediction HUD (`PredictionHUD.tsx`)
184
+ - [x] Glassmorphism card: `backdrop-blur`, `bg-white/10`, `border-white/20`
185
+ - [x] Large Gujarati sign name (mapped from label index)
186
+ - [x] Confidence bar: animated width transition via Framer Motion `animate={{ width: confidence% }}`
187
+ - [x] Color coding: green (>85%), yellow (60–85%), red (<60%)
188
+ - [x] Rolling history list: last 10 recognized signs (Framer Motion `AnimatePresence` for enter/exit)
189
+ - [x] Pipeline badge: shows which pipeline (A/B/C) produced the result
190
+ - [x] Latency display: shows WS round-trip time in ms
191
+
192
+ ### 2.7 Onboarding Guide (`OnboardingGuide.tsx`)
193
+ - [x] 3-step animated wizard using Framer Motion page transitions
194
+ 1. "Position your hand 30–60cm from camera"
195
+ 2. "Ensure good lighting, avoid dark backgrounds"
196
+ 3. "Show signs clearly — palm facing camera"
197
+ - [x] Skip button + "Don't show again" (localStorage)
198
+
199
+ ### 2.8 Calibration Screen (`Calibration.tsx`)
200
+ - [x] Brief 2-second "Ready?" screen after onboarding
201
+ - [x] Check: hand detected by MediaPipe → show green checkmark animation
202
+ - [x] Auto-transitions to main translation view when hand is stable for 1 second
203
+
204
+ ### 2.9 Main App Layout (`App.tsx`)
205
+ - [x] Full-screen dark background with subtle animated gradient
206
+ - [x] Three-panel layout (desktop): webcam | HUD | history
207
+ - [x] Mobile: stacked layout with webcam top, HUD bottom
208
+ - [x] Header: "SanketSetu | સંકેત-સેતુ" with glowing text effect
209
+ - [x] Settings gear icon → modal for pipeline selection (A / B / C / Ensemble), confidence threshold slider
210
+
211
+ ---
212
+
213
+ ## Phase 3 — Dockerization & Deployment
214
+
215
+ ### 3.1 Backend Dockerfile
216
+ - [x] Create `Dockerfile` (repo root, build context includes models)
217
+ - [x] Add `.dockerignore` (excludes `.venv`, `node_modules`, `*.pth`, tests)
218
+ - [ ] Test locally: `docker build -t sanketsetu-backend . && docker run -p 8000:8000 sanketsetu-backend`
219
+
220
+ ### 3.2 Fly.io Configuration
221
+ - [x] Create `fly.toml` (repo root, region=maa, port 8000, shared-cpu-2x)
222
+ - [x] Note: Keras/TF will increase Docker image size — use `tensorflow-cpu` to keep slim
223
+ - [ ] Set secrets via `flyctl secrets set` for any API keys
224
+ - [ ] Run: `flyctl deploy --dockerfile Dockerfile`
225
+
226
+ ### 3.3 Vercel Frontend Deployment
227
+ - [x] Create `frontend/vercel.json` with SPA rewrite + WASM Content-Type header
228
+ - [x] Add `VITE_WS_URL` and `VITE_API_URL` to Vercel environment variables (via CI vars)
229
+ - [ ] Ensure `@mediapipe/tasks-vision` WASM files are served correctly (add to `public/`)
230
+
231
+ ### 3.4 GitHub Actions CI/CD
232
+ - [x] Create `.github/workflows/deploy-backend.yml`
233
+ - Triggers on push to `main` when `backend/**` changes
234
+ - Steps: checkout → setup Python → run tests → `flyctl deploy`
235
+ - [x] Create `.github/workflows/deploy-frontend.yml`
236
+ - Triggers on push to `main` when `frontend/**` changes
237
+ - Steps: checkout → `npm ci` → tsc → `npm run build` → Vercel CLI deploy
238
+
239
+ ---
240
+
241
+ ## Phase 4 — Testing & Hardening
242
+
243
+ ### 4.1 Backend Tests
244
+ - [x] `tests/test_pipeline_a.py` — 8 unit tests, XGBoost inference (4s)
245
+ - [x] `tests/test_pipeline_b.py` — 6 unit tests, encoder + LightGBM (49s)
246
+ - [x] `tests/test_pipeline_c.py` — 7 unit tests, CNN + SVM with real 128×128 images (14s)
247
+ - [x] `tests/test_websocket.py` — 7 integration tests, health + REST + WS round-trip
248
+
249
+ ### 4.2 Frontend Error Handling
250
+ - [ ] No-camera fallback UI (file upload for image mode)
251
+ - [x] WS reconnecting banner (red banner when `!isConnected && stage === 'running'`)
252
+ - [x] Low-bandwidth mode: reduce send rate to 5fps if latency > 500ms + yellow "LB" badge in HUD
253
+ - [x] MediaPipe WASM load failure fallback message (shown in header via `mpError`)
254
+
255
+ ### 4.3 Label Map (Critical)
256
+ - [ ] Create `backend/app/models/label_map.py` mapping classes 0–33 to actual Gujarati signs
257
+ - You need to confirm the exact mapping used during training (check your original dataset/notebook)
258
+ - Placeholder: `LABEL_MAP = { 0: "ક", 1: "ખ", ... , 33: "?" }`
259
+ - This file must exactly mirror what was used in training
260
+
261
+ ---
262
+
263
+ ## Execution Order (Start Here)
264
+
265
+ ```
266
+ Week 1: Phase 1.1 → 1.3 → 1.7 (get WS working with Pipeline A alone, test in browser)
267
+ Week 2: Phase 1.4 → 1.5 → 1.6 (add other pipelines + ensemble)
268
+ Week 3: Phase 2.1 → 2.2 → 2.3 → 2.4 (React skeleton + WS connected)
269
+ Week 4: Phase 2.5 → 2.6 → 2.7 → 2.8 → 2.9 (full UI)
270
+ Week 5: Phase 3 + 4 (deploy + tests)
271
+ ```
272
+
273
+ ---
274
+
275
+ ## Critical Decision Points
276
+
277
+ | Decision | Default | Notes |
278
+ |---|---|---|
279
+ | Primary pipeline | **A (XGBoost)** | Sub-ms inference, uses MediaPipe landmarks already extracted client-side |
280
+ | Confidence threshold for fallback | **0.70** | Tune after testing - if XGBoost < 70%, call Pipeline B |
281
+ | Enable Pipeline C (CNN) | **Optional / off by default** | Adds ~150ms latency and requires image upload, not just landmarks |
282
+ | MediaPipe model variant | **lite** | Use `hand_landmarker_lite.task` for mobile performance |
283
+ | WebSocket frame rate | **15fps** | Sufficient for sign recognition, avoids server overload |
284
+ | Gujarati label map | **CONFIRM WITH DATASET** | Classes 0–33 must match training data exactly |
backend/.env.example ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SanketSetu Backend — environment variables
2
+ # Copy this file to .env and edit as needed.
3
+ # All values below are the defaults; remove a line to keep the default.
4
+
5
+ # ── Model paths (default: resolved from repo root) ─────────────────────────
6
+ # WEIGHTS_DIR=/absolute/path/to/model/dir
7
+
8
+ # ── Inference ───────────────────────────────────────────────────────────────
9
+ # Pipeline A confidence below this → also run Pipeline B
10
+ CONFIDENCE_THRESHOLD=0.70
11
+
12
+ # Pipeline A+B ensemble confidence below this → also run Pipeline C (if image)
13
+ SECONDARY_THRESHOLD=0.60
14
+
15
+ # Which pipeline to run: A | B | C | ensemble
16
+ PIPELINE_MODE=ensemble
17
+
18
+ # ── Server ──────────────────────────────────────────────────────────────────
19
+ MAX_WS_CONNECTIONS=100
20
+
21
+ # Comma-separated list of allowed CORS origins
22
+ CORS_ORIGINS=http://localhost:5173,http://localhost:3000
23
+
24
+ # ── TensorFlow / Keras ──────────────────────────────────────────────────────
25
+ KERAS_BACKEND=tensorflow
26
+ TF_CPP_MIN_LOG_LEVEL=3
27
+ CUDA_VISIBLE_DEVICES= # empty = CPU-only, skip GPU scan (faster startup)
28
+ TF_ENABLE_ONEDNN_OPTS=0
29
+ OMP_NUM_THREADS=4
30
+
31
+ # ── Logging ─────────────────────────────────────────────────────────────────
32
+ LOG_LEVEL=INFO
backend/app/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # backend/app/__init__.py
backend/app/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (162 Bytes). View file
 
backend/app/__pycache__/config.cpython-312.pyc ADDED
Binary file (2.97 kB). View file
 
backend/app/__pycache__/main.cpython-312.pyc ADDED
Binary file (12.1 kB). View file
 
backend/app/__pycache__/schemas.cpython-312.pyc ADDED
Binary file (3.93 kB). View file
 
backend/app/config.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Application-wide settings for SanketSetu backend.
3
+ Override any value by setting the corresponding environment variable.
4
+ """
5
+ from __future__ import annotations
6
+
7
+ import os
8
+ from pathlib import Path
9
+
10
+ # ---------------------------------------------------------------------------
11
+ # TensorFlow / Keras startup optimisations
12
+ # Set these BEFORE any import that might pull in tensorflow.
13
+ # ---------------------------------------------------------------------------
14
+ os.environ.setdefault("KERAS_BACKEND", "tensorflow")
15
+ os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL", "3") # silence C++ TF logs
16
+ os.environ.setdefault("CUDA_VISIBLE_DEVICES", "") # CPU-only: skip GPU scan
17
+ os.environ.setdefault("TF_ENABLE_ONEDNN_OPTS", "0") # disable oneDNN init check
18
+ os.environ.setdefault("OMP_NUM_THREADS", "4") # cap CPU thread pool
19
+
20
+ # ---------------------------------------------------------------------------
21
+ # Paths
22
+ # ---------------------------------------------------------------------------
23
+ BASE_DIR = Path(__file__).resolve().parent.parent.parent # repo root
24
+ WEIGHTS_DIR = os.getenv("WEIGHTS_DIR", str(BASE_DIR))
25
+
26
+ # Individual model paths (relative to repo root)
27
+ PIPELINE_A_MODEL = os.path.join(WEIGHTS_DIR, "Mediapipe_XGBoost", "model.pkl")
28
+ PIPELINE_B_AE = os.path.join(WEIGHTS_DIR, "CNN_Autoencoder_LightGBM", "autoencoder_model.pkl")
29
+ PIPELINE_B_LGBM = os.path.join(WEIGHTS_DIR, "CNN_Autoencoder_LightGBM", "lgbm_model.pkl")
30
+ PIPELINE_C_CNN = os.path.join(WEIGHTS_DIR, "CNN_PreTrained", "cnn_model.pkl")
31
+ PIPELINE_C_SVM = os.path.join(WEIGHTS_DIR, "CNN_PreTrained", "svm_model.pkl")
32
+
33
+ # ---------------------------------------------------------------------------
34
+ # Inference thresholds
35
+ # ---------------------------------------------------------------------------
36
+ # If Pipeline A confidence falls below this, Pipeline B is also called.
37
+ CONFIDENCE_THRESHOLD: float = float(os.getenv("CONFIDENCE_THRESHOLD", "0.70"))
38
+
39
+ # If ensemble after B still below this, Pipeline C is attempted (if image provided).
40
+ SECONDARY_THRESHOLD: float = float(os.getenv("SECONDARY_THRESHOLD", "0.60"))
41
+
42
+ # ---------------------------------------------------------------------------
43
+ # Pipeline mode
44
+ # ---------------------------------------------------------------------------
45
+ # "A" → only XGBoost (fastest)
46
+ # "B" → only Autoencoder + LGBM
47
+ # "C" → only CNN + SVM (image required)
48
+ # "ensemble" → A first, fallback to B, then C
49
+ PIPELINE_MODE: str = os.getenv("PIPELINE_MODE", "ensemble")
50
+
51
+ # ---------------------------------------------------------------------------
52
+ # WebSocket / server
53
+ # ---------------------------------------------------------------------------
54
+ MAX_WS_CONNECTIONS: int = int(os.getenv("MAX_WS_CONNECTIONS", "100"))
55
+ WS_SEND_RATE_LIMIT: int = int(os.getenv("WS_SEND_RATE_LIMIT", "15")) # max frames/sec per client
56
+
57
+ # Allowed CORS origins (comma-separated list in env var)
58
+ _cors_env = os.getenv("CORS_ORIGINS", "http://localhost:5173,http://localhost:3000")
59
+ CORS_ORIGINS: list[str] = [o.strip() for o in _cors_env.split(",") if o.strip()]
60
+
61
+ # ---------------------------------------------------------------------------
62
+ # Logging
63
+ # ---------------------------------------------------------------------------
64
+ LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO")
backend/app/inference/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # backend/app/inference/__init__.py
backend/app/inference/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (172 Bytes). View file
 
backend/app/inference/__pycache__/ensemble.cpython-312.pyc ADDED
Binary file (5.66 kB). View file
 
backend/app/inference/__pycache__/pipeline_a.cpython-312.pyc ADDED
Binary file (2.16 kB). View file
 
backend/app/inference/__pycache__/pipeline_b.cpython-312.pyc ADDED
Binary file (2.4 kB). View file
 
backend/app/inference/__pycache__/pipeline_c.cpython-312.pyc ADDED
Binary file (3.52 kB). View file
 
backend/app/inference/ensemble.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Ensemble logic — orchestrates Pipelines A → B → C with confidence-based fallback.
3
+
4
+ Strategy
5
+ --------
6
+ 1. Always run Pipeline A (XGBoost, sub-ms).
7
+ 2. If confidence < CONFIDENCE_THRESHOLD, also run Pipeline B (Autoencoder+LGBM).
8
+ 3. Average the probability vectors from the pipelines that were run.
9
+ 4. If ensemble confidence still < SECONDARY_THRESHOLD AND image data is supplied,
10
+ also run Pipeline C (CNN+SVM) and include it in the average.
11
+ 5. Return the class with the highest averaged probability.
12
+
13
+ The caller can also force a specific pipeline via the PIPELINE_MODE config.
14
+ """
15
+ from __future__ import annotations
16
+
17
+ import logging
18
+ import time
19
+ from typing import List, Optional, Any
20
+
21
+ import numpy as np
22
+
23
+ from app import config
24
+ from app.models.label_map import get_sign
25
+ from app.inference.pipeline_a import PredictionResult
26
+ import app.inference.pipeline_a as _pa
27
+ import app.inference.pipeline_b as _pb
28
+ import app.inference.pipeline_c as _pc
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+
33
+ def run(
34
+ landmarks: List[float],
35
+ *,
36
+ image_input: Optional[str] = None,
37
+ xgb_model: Any,
38
+ encoder_model: Any,
39
+ lgbm_model: Any,
40
+ cnn_model: Any,
41
+ svm_model: Any,
42
+ pipeline_mode: str = "ensemble",
43
+ confidence_threshold: float = 0.70,
44
+ secondary_threshold: float = 0.60,
45
+ ) -> PredictionResult:
46
+ """
47
+ Run one or more inference pipelines and return a consolidated PredictionResult.
48
+
49
+ Parameters
50
+ ----------
51
+ landmarks : flat 63-element MediaPipe landmark vector
52
+ image_input : optional base-64 JPEG for Pipeline C
53
+ xgb_model : Pipeline A model
54
+ encoder_model : Pipeline B encoder (Keras sub-model)
55
+ lgbm_model : Pipeline B classifier
56
+ cnn_model : Pipeline C feature extractor
57
+ svm_model : Pipeline C classifier
58
+ pipeline_mode : "A" | "B" | "C" | "ensemble"
59
+ confidence_threshold : fallback to B when A confidence < this value
60
+ secondary_threshold : fallback to C when ensemble(A+B) confidence < this value
61
+ """
62
+ t0 = time.perf_counter()
63
+
64
+ # -----------------------------------------------------------
65
+ # Forced single-pipeline modes
66
+ # -----------------------------------------------------------
67
+ if pipeline_mode == "A":
68
+ if xgb_model is None:
69
+ raise RuntimeError("Pipeline A model not loaded.")
70
+ return _pa.predict(landmarks, xgb_model)
71
+
72
+ if pipeline_mode == "B":
73
+ if encoder_model is None or lgbm_model is None:
74
+ raise RuntimeError("Pipeline B models not loaded.")
75
+ return _pb.predict(landmarks, encoder_model, lgbm_model)
76
+
77
+ if pipeline_mode == "C":
78
+ if cnn_model is None or svm_model is None:
79
+ raise RuntimeError("Pipeline C models not loaded.")
80
+ if image_input is None:
81
+ raise ValueError("Pipeline C requires image_input.")
82
+ return _pc.predict(image_input, cnn_model, svm_model)
83
+
84
+ # -----------------------------------------------------------
85
+ # Ensemble mode (default)
86
+ # -----------------------------------------------------------
87
+ results: list[PredictionResult] = []
88
+ proba_stack: list[list[float]] = []
89
+
90
+ # Step 1 — Pipeline A (always)
91
+ if xgb_model is not None:
92
+ res_a = _pa.predict(landmarks, xgb_model)
93
+ results.append(res_a)
94
+ proba_stack.append(res_a.probabilities)
95
+ else:
96
+ logger.warning("Pipeline A not available in ensemble mode.")
97
+ res_a = None
98
+
99
+ # Step 2 — Pipeline B if A confidence is low
100
+ current_conf = float(np.max(np.mean(proba_stack, axis=0))) if proba_stack else 0.0
101
+ if current_conf < confidence_threshold and encoder_model is not None and lgbm_model is not None:
102
+ res_b = _pb.predict(landmarks, encoder_model, lgbm_model)
103
+ results.append(res_b)
104
+ proba_stack.append(res_b.probabilities)
105
+
106
+ # Step 3 — Pipeline C if still low and image provided
107
+ current_conf = float(np.max(np.mean(proba_stack, axis=0))) if proba_stack else 0.0
108
+ if (
109
+ current_conf < secondary_threshold
110
+ and image_input is not None
111
+ and cnn_model is not None
112
+ and svm_model is not None
113
+ ):
114
+ res_c = _pc.predict(image_input, cnn_model, svm_model)
115
+ results.append(res_c)
116
+ proba_stack.append(res_c.probabilities)
117
+
118
+ # -----------------------------------------------------------
119
+ # Aggregate
120
+ # -----------------------------------------------------------
121
+ if not proba_stack:
122
+ raise RuntimeError("No inference pipeline could be executed.")
123
+
124
+ avg_proba = np.mean(proba_stack, axis=0) # shape (34,)
125
+ idx = int(np.argmax(avg_proba))
126
+ conf = float(avg_proba[idx])
127
+
128
+ pipeline_labels = "+".join(r.pipeline for r in results)
129
+ total_latency = (time.perf_counter() - t0) * 1000
130
+
131
+ return PredictionResult(
132
+ sign=get_sign(idx),
133
+ confidence=conf,
134
+ label_index=idx,
135
+ probabilities=avg_proba.tolist(),
136
+ pipeline=pipeline_labels if len(results) > 1 else results[0].pipeline,
137
+ latency_ms=total_latency,
138
+ )
backend/app/inference/pipeline_a.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Pipeline A — XGBoost classifier on raw MediaPipe landmarks.
3
+
4
+ Input : 63 floats [x0,y0,z0 … x20,y20,z20] (already [0,1] normalised by MediaPipe)
5
+ Output : PredictionResult
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import time
10
+ from dataclasses import dataclass
11
+ from typing import List
12
+
13
+ import numpy as np
14
+
15
+ from app.models.label_map import get_sign
16
+
17
+
18
+ @dataclass
19
+ class PredictionResult:
20
+ sign: str
21
+ confidence: float
22
+ label_index: int
23
+ probabilities: List[float]
24
+ pipeline: str
25
+ latency_ms: float
26
+
27
+
28
+ def predict(landmarks: List[float], xgb_model) -> PredictionResult:
29
+ """
30
+ Run XGBoost inference on a flat 63-element landmark vector.
31
+
32
+ Parameters
33
+ ----------
34
+ landmarks : list of 63 floats
35
+ xgb_model : loaded XGBClassifier instance
36
+
37
+ Returns
38
+ -------
39
+ PredictionResult
40
+ """
41
+ t0 = time.perf_counter()
42
+
43
+ X = np.array(landmarks, dtype=np.float32).reshape(1, -1) # shape (1, 63)
44
+ proba = xgb_model.predict_proba(X)[0] # shape (34,)
45
+ idx = int(np.argmax(proba))
46
+ conf = float(proba[idx])
47
+
48
+ latency = (time.perf_counter() - t0) * 1000
49
+
50
+ return PredictionResult(
51
+ sign=get_sign(idx),
52
+ confidence=conf,
53
+ label_index=idx,
54
+ probabilities=proba.tolist(),
55
+ pipeline="A",
56
+ latency_ms=latency,
57
+ )
backend/app/inference/pipeline_b.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Pipeline B — Autoencoder encoder + LightGBM classifier.
3
+
4
+ Input : 63 floats [x0,y0,z0 … x20,y20,z20]
5
+ Process: Keras encoder compresses to 16-D bottleneck → LGBMClassifier
6
+ Output : PredictionResult
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import time
11
+ import warnings
12
+ from dataclasses import dataclass
13
+ from typing import List, Any
14
+
15
+ import numpy as np
16
+
17
+ from app.models.label_map import get_sign
18
+ from app.inference.pipeline_a import PredictionResult
19
+
20
+
21
+ def predict(landmarks: List[float], encoder_model: Any, lgbm_model: Any) -> PredictionResult:
22
+ """
23
+ Run the autoencoder-encoder → LightGBM inference chain.
24
+
25
+ Parameters
26
+ ----------
27
+ landmarks : list of 63 floats
28
+ encoder_model : Keras Model (input 63→output 16, bottleneck sub-model)
29
+ lgbm_model : loaded LGBMClassifier instance
30
+
31
+ Returns
32
+ -------
33
+ PredictionResult
34
+ """
35
+ t0 = time.perf_counter()
36
+
37
+ X = np.array(landmarks, dtype=np.float32).reshape(1, -1) # (1, 63)
38
+
39
+ # Encode to 16-D bottleneck (suppress verbose Keras progress bar)
40
+ features = encoder_model(X, training=False).numpy() # (1, 16)
41
+
42
+ # LightGBM classify — suppress sklearn feature-name warning (model was
43
+ # fitted with a named DataFrame; numpy array input is perfectly valid)
44
+ with warnings.catch_warnings():
45
+ warnings.simplefilter("ignore", UserWarning)
46
+ proba = lgbm_model.predict_proba(features)[0] # (34,)
47
+ idx = int(np.argmax(proba))
48
+ conf = float(proba[idx])
49
+
50
+ latency = (time.perf_counter() - t0) * 1000
51
+
52
+ return PredictionResult(
53
+ sign=get_sign(idx),
54
+ confidence=conf,
55
+ label_index=idx,
56
+ probabilities=proba.tolist(),
57
+ pipeline="B",
58
+ latency_ms=latency,
59
+ )
backend/app/inference/pipeline_c.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Pipeline C — Pre-trained CNN (ResNet50) feature extractor + SVM classifier.
3
+
4
+ Input : base-64 encoded JPEG string OR raw bytes of a 128×128 RGB hand-crop.
5
+ Process: Decode → normalise → CNN (256-D features) → SVC.predict_proba
6
+ Output : PredictionResult
7
+
8
+ Note: This pipeline is significantly slower (~100–300 ms on CPU) and is only
9
+ invoked as a fallback when landmark-based pipelines have low confidence.
10
+ """
11
+ from __future__ import annotations
12
+
13
+ import base64
14
+ import io
15
+ import time
16
+ from dataclasses import dataclass
17
+ from typing import Any, List, Union
18
+
19
+ import numpy as np
20
+ from PIL import Image
21
+
22
+ from app.models.label_map import get_sign
23
+ from app.inference.pipeline_a import PredictionResult
24
+
25
+ # Target input size expected by the CNN (ResNet50 Functional model)
26
+ CNN_IMG_SIZE: int = 128
27
+
28
+
29
+ def _decode_image(image_input: Union[str, bytes]) -> np.ndarray:
30
+ """
31
+ Accept either:
32
+ - A base-64 encoded JPEG string (from WebSocket JSON payload)
33
+ - Raw bytes (from HTTP multipart)
34
+ Returns a (128, 128, 3) float32 array normalised to [0, 1].
35
+ """
36
+ if isinstance(image_input, str):
37
+ raw = base64.b64decode(image_input)
38
+ else:
39
+ raw = image_input
40
+
41
+ img = Image.open(io.BytesIO(raw)).convert("RGB")
42
+ img = img.resize((CNN_IMG_SIZE, CNN_IMG_SIZE), Image.LANCZOS)
43
+ arr = np.array(img, dtype=np.float32) / 255.0
44
+ return arr # (128, 128, 3)
45
+
46
+
47
+ def predict(
48
+ image_input: Union[str, bytes],
49
+ cnn_model: Any,
50
+ svm_model: Any,
51
+ ) -> PredictionResult:
52
+ """
53
+ Run the CNN + SVM inference pipeline.
54
+
55
+ Parameters
56
+ ----------
57
+ image_input : base-64 JPEG string or raw bytes of the hand crop (any size; will be resized)
58
+ cnn_model : Keras Functional model (ResNet50-based, output 256-D feature vector)
59
+ svm_model : loaded SVC(C=10, probability=True) instance
60
+
61
+ Returns
62
+ -------
63
+ PredictionResult
64
+ """
65
+ t0 = time.perf_counter()
66
+
67
+ img = _decode_image(image_input) # (128, 128, 3)
68
+ batch = img[np.newaxis] # (1, 128, 128, 3)
69
+
70
+ # CNN forward pass — directly call model (avoids Keras verbose logging)
71
+ features = cnn_model(batch, training=False).numpy() # (1, 256)
72
+
73
+ proba = svm_model.predict_proba(features)[0] # (34,)
74
+ idx = int(np.argmax(proba))
75
+ conf = float(proba[idx])
76
+
77
+ latency = (time.perf_counter() - t0) * 1000
78
+
79
+ return PredictionResult(
80
+ sign=get_sign(idx),
81
+ confidence=conf,
82
+ label_index=idx,
83
+ probabilities=proba.tolist(),
84
+ pipeline="C",
85
+ latency_ms=latency,
86
+ )
backend/app/main.py ADDED
@@ -0,0 +1,280 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ SanketSetu FastAPI backend — entry point.
3
+
4
+ Endpoints
5
+ ---------
6
+ GET /health → HealthResponse
7
+ WS /ws/landmarks → real-time sign recognition (landmark stream)
8
+ WS /ws/image → image-based sign recognition (Pipeline C)
9
+ POST /api/predict → REST fallback for landmark inference
10
+ POST /api/predict/image → REST fallback for image inference
11
+ """
12
+ from __future__ import annotations
13
+
14
+ import json
15
+ import logging
16
+ import os
17
+ import time
18
+ from contextlib import asynccontextmanager
19
+ from pathlib import Path
20
+ from typing import Any
21
+
22
+ # Load .env if present (before config is imported so env vars are available)
23
+ try:
24
+ from dotenv import load_dotenv
25
+ _env_file = Path(__file__).resolve().parent.parent / ".env"
26
+ if _env_file.exists():
27
+ load_dotenv(_env_file)
28
+ except ImportError:
29
+ pass # python-dotenv not installed; rely on shell env
30
+
31
+ import numpy as np
32
+ from fastapi import FastAPI, WebSocket, WebSocketDisconnect, HTTPException, Request
33
+ from fastapi.middleware.cors import CORSMiddleware
34
+ from fastapi.responses import JSONResponse
35
+
36
+ from app import config
37
+ from app.models.loader import load_models, get_model_store
38
+ from app.schemas import (
39
+ LandmarkMessage,
40
+ ImageMessage,
41
+ EnsembleMessage,
42
+ PredictionResponse,
43
+ HealthResponse,
44
+ ErrorResponse,
45
+ )
46
+ import app.inference.ensemble as ensemble
47
+
48
+ # ---------------------------------------------------------------------------
49
+ # Logging
50
+ # ---------------------------------------------------------------------------
51
+ logging.basicConfig(
52
+ level=getattr(logging, config.LOG_LEVEL, logging.INFO),
53
+ format="%(asctime)s %(levelname)-8s %(name)s — %(message)s",
54
+ )
55
+ logger = logging.getLogger("sanketsetu")
56
+
57
+ # Silence noisy TF / Keras output
58
+ os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL", "3")
59
+ os.environ.setdefault("KERAS_BACKEND", "tensorflow")
60
+ logging.getLogger("tensorflow").setLevel(logging.ERROR)
61
+ logging.getLogger("keras").setLevel(logging.ERROR)
62
+
63
+
64
+ # ---------------------------------------------------------------------------
65
+ # Lifespan — load models on startup
66
+ # ---------------------------------------------------------------------------
67
+ @asynccontextmanager
68
+ async def lifespan(app: FastAPI):
69
+ logger.info("Starting SanketSetu backend …")
70
+ load_models()
71
+ logger.info("Models ready. Server accepting connections.")
72
+ yield
73
+ logger.info("Shutting down.")
74
+
75
+
76
+ # ---------------------------------------------------------------------------
77
+ # App
78
+ # ---------------------------------------------------------------------------
79
+ app = FastAPI(
80
+ title="SanketSetu API",
81
+ description="Real-time Gujarati Sign Language recognition backend",
82
+ version="1.0.0",
83
+ lifespan=lifespan,
84
+ )
85
+
86
+ # CORS
87
+ app.add_middleware(
88
+ CORSMiddleware,
89
+ allow_origins=config.CORS_ORIGINS,
90
+ allow_credentials=True,
91
+ allow_methods=["*"],
92
+ allow_headers=["*"],
93
+ )
94
+
95
+
96
+ # ---------------------------------------------------------------------------
97
+ # Global exception handler
98
+ # ---------------------------------------------------------------------------
99
+ @app.exception_handler(Exception)
100
+ async def global_exception_handler(request: Request, exc: Exception):
101
+ logger.exception("Unhandled error: %s", exc)
102
+ return JSONResponse(
103
+ status_code=500,
104
+ content=ErrorResponse(error="Internal server error", detail=str(exc)).model_dump(),
105
+ )
106
+
107
+
108
+ # ---------------------------------------------------------------------------
109
+ # Helpers
110
+ # ---------------------------------------------------------------------------
111
+
112
+ def _run_ensemble(
113
+ landmarks: list[float],
114
+ image_b64: str | None = None,
115
+ ) -> PredictionResponse:
116
+ store = get_model_store()
117
+ result = ensemble.run(
118
+ landmarks,
119
+ image_input=image_b64,
120
+ xgb_model=store.xgb_model,
121
+ encoder_model=store.encoder_model,
122
+ lgbm_model=store.lgbm_model,
123
+ cnn_model=store.cnn_model,
124
+ svm_model=store.svm_model,
125
+ pipeline_mode=config.PIPELINE_MODE,
126
+ confidence_threshold=config.CONFIDENCE_THRESHOLD,
127
+ secondary_threshold=config.SECONDARY_THRESHOLD,
128
+ )
129
+ return PredictionResponse(
130
+ sign=result.sign,
131
+ confidence=result.confidence,
132
+ pipeline=result.pipeline,
133
+ label_index=result.label_index,
134
+ probabilities=result.probabilities,
135
+ latency_ms=result.latency_ms,
136
+ )
137
+
138
+
139
+ def _available_pipelines() -> list[str]:
140
+ try:
141
+ store = get_model_store()
142
+ except RuntimeError:
143
+ return []
144
+ pipelines = []
145
+ if store.xgb_model is not None:
146
+ pipelines.append("A")
147
+ if store.encoder_model is not None and store.lgbm_model is not None:
148
+ pipelines.append("B")
149
+ if store.cnn_model is not None and store.svm_model is not None:
150
+ pipelines.append("C")
151
+ return pipelines
152
+
153
+
154
+ # ---------------------------------------------------------------------------
155
+ # REST endpoints
156
+ # ---------------------------------------------------------------------------
157
+
158
+ @app.get("/health", response_model=HealthResponse)
159
+ async def health():
160
+ try:
161
+ store = get_model_store()
162
+ loaded = store.loaded
163
+ except RuntimeError:
164
+ loaded = False
165
+ return HealthResponse(
166
+ status="ok" if loaded else "loading",
167
+ models_loaded=loaded,
168
+ pipelines_available=_available_pipelines(),
169
+ )
170
+
171
+
172
+ @app.post("/api/predict", response_model=PredictionResponse)
173
+ async def predict_landmarks(body: LandmarkMessage):
174
+ """REST fallback: send 63 landmark floats, receive prediction."""
175
+ return _run_ensemble(body.landmarks)
176
+
177
+
178
+ @app.post("/api/predict/image", response_model=PredictionResponse)
179
+ async def predict_image(body: ImageMessage):
180
+ """REST fallback: send a base-64 hand crop, receive prediction via Pipeline C."""
181
+ store = get_model_store()
182
+ if store.cnn_model is None or store.svm_model is None:
183
+ raise HTTPException(status_code=503, detail="Pipeline C (CNN+SVM) is not available.")
184
+ import app.inference.pipeline_c as _pc
185
+ result = _pc.predict(body.image_b64, store.cnn_model, store.svm_model)
186
+ return PredictionResponse(
187
+ sign=result.sign,
188
+ confidence=result.confidence,
189
+ pipeline=result.pipeline,
190
+ label_index=result.label_index,
191
+ probabilities=result.probabilities,
192
+ latency_ms=result.latency_ms,
193
+ )
194
+
195
+
196
+ # ---------------------------------------------------------------------------
197
+ # WebSocket — landmark stream /ws/landmarks
198
+ # ---------------------------------------------------------------------------
199
+
200
+ @app.websocket("/ws/landmarks")
201
+ async def ws_landmarks(ws: WebSocket):
202
+ """
203
+ Primary real-time endpoint.
204
+ Client sends: {"landmarks": [...63 floats...], "session_id": "..."}
205
+ Server replies: PredictionResponse JSON
206
+ """
207
+ await ws.accept()
208
+ session_id = "unknown"
209
+ try:
210
+ while True:
211
+ raw = await ws.receive_text()
212
+ try:
213
+ data = json.loads(raw)
214
+ msg = LandmarkMessage(**data)
215
+ session_id = msg.session_id
216
+
217
+ response = _run_ensemble(msg.landmarks)
218
+ await ws.send_text(response.model_dump_json())
219
+
220
+ except ValueError as ve:
221
+ await ws.send_text(
222
+ ErrorResponse(error="Validation error", detail=str(ve)).model_dump_json()
223
+ )
224
+ except Exception as e:
225
+ logger.error("[%s] Inference error: %s", session_id, e, exc_info=True)
226
+ await ws.send_text(
227
+ ErrorResponse(error="Inference failed", detail=str(e)).model_dump_json()
228
+ )
229
+
230
+ except WebSocketDisconnect:
231
+ logger.info("Client disconnected: %s", session_id)
232
+
233
+
234
+ # ---------------------------------------------------------------------------
235
+ # WebSocket — image stream /ws/image (Pipeline C)
236
+ # ---------------------------------------------------------------------------
237
+
238
+ @app.websocket("/ws/image")
239
+ async def ws_image(ws: WebSocket):
240
+ """
241
+ Image-based endpoint for Pipeline C (CNN+SVM).
242
+ Client sends: {"image_b64": "<base64 JPEG>", "session_id": "..."}
243
+ """
244
+ await ws.accept()
245
+ session_id = "unknown"
246
+ try:
247
+ while True:
248
+ raw = await ws.receive_text()
249
+ try:
250
+ data = json.loads(raw)
251
+ msg = ImageMessage(**data)
252
+ session_id = msg.session_id
253
+
254
+ store = get_model_store()
255
+ if store.cnn_model is None or store.svm_model is None:
256
+ await ws.send_text(
257
+ ErrorResponse(error="Pipeline C not available").model_dump_json()
258
+ )
259
+ continue
260
+
261
+ import app.inference.pipeline_c as _pc
262
+ result = _pc.predict(msg.image_b64, store.cnn_model, store.svm_model)
263
+ response = PredictionResponse(
264
+ sign=result.sign,
265
+ confidence=result.confidence,
266
+ pipeline=result.pipeline,
267
+ label_index=result.label_index,
268
+ probabilities=result.probabilities,
269
+ latency_ms=result.latency_ms,
270
+ )
271
+ await ws.send_text(response.model_dump_json())
272
+
273
+ except Exception as e:
274
+ logger.error("[%s] Image inference error: %s", session_id, e, exc_info=True)
275
+ await ws.send_text(
276
+ ErrorResponse(error="Inference failed", detail=str(e)).model_dump_json()
277
+ )
278
+
279
+ except WebSocketDisconnect:
280
+ logger.info("Image client disconnected: %s", session_id)
backend/app/models/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # backend/app/models/__init__.py
backend/app/models/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (169 Bytes). View file
 
backend/app/models/__pycache__/label_map.cpython-312.pyc ADDED
Binary file (2.3 kB). View file
 
backend/app/models/__pycache__/loader.cpython-312.pyc ADDED
Binary file (7.72 kB). View file
 
backend/app/models/label_map.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Label map: index 0–33 → Gujarati sign name.
3
+
4
+ The 34 classes cover the Gujarati consonant alphabet (ક–ળ / ક-ળ) as used in the
5
+ training dataset. Verify this order against your original data-collection script /
6
+ notebook — if your dataset folder names differ, update the list below.
7
+
8
+ Current assumption: classes are sorted by the Gujarati alphabet order (Unicode order
9
+ of the Unicode Gujarati block, U+0A80–U+0AFF).
10
+ """
11
+ from __future__ import annotations
12
+
13
+ # ---- Primary label map (index → Gujarati character / word) -----------------
14
+ # 34 classes: consonants + a few vowel signs used as standalone signs
15
+ LABEL_MAP: dict[int, str] = {
16
+ 0: "ક", # ka
17
+ 1: "ખ", # kha
18
+ 2: "ગ", # ga
19
+ 3: "ઘ", # gha
20
+ 4: "ચ", # cha
21
+ 5: "છ", # chha
22
+ 6: "જ", # ja
23
+ 7: "ઝ", # jha
24
+ 8: "ટ", # ṭa
25
+ 9: "ઠ", # ṭha
26
+ 10: "ડ", # ḍa
27
+ 11: "ઢ", # ḍha
28
+ 12: "ણ", # ṇa
29
+ 13: "ત", # ta
30
+ 14: "થ", # tha
31
+ 15: "દ", # da
32
+ 16: "ધ", # dha
33
+ 17: "ન", # na
34
+ 18: "પ", # pa
35
+ 19: "ફ", # pha
36
+ 20: "બ", # ba
37
+ 21: "ભ", # bha
38
+ 22: "મ", # ma
39
+ 23: "ય", # ya
40
+ 24: "ર", # ra
41
+ 25: "લ", # la
42
+ 26: "વ", # va
43
+ 27: "શ", # sha
44
+ 28: "ષ", # ṣha
45
+ 29: "સ", # sa
46
+ 30: "હ", # ha
47
+ 31: "ળ", # ḷa
48
+ 32: "ક્ષ", # ksha (conjunct)
49
+ 33: "જ્ઞ", # gna (conjunct)
50
+ }
51
+
52
+ # Reverse map: sign name → index (useful for testing)
53
+ REVERSE_MAP: dict[str, int] = {v: k for k, v in LABEL_MAP.items()}
54
+
55
+
56
+ def get_sign(label_index: int) -> str:
57
+ """Return the Gujarati sign for the given class index."""
58
+ return LABEL_MAP.get(label_index, f"[{label_index}]")
backend/app/models/loader.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Singleton model store — loads all model artifacts once at startup and holds them
3
+ in memory for the lifetime of the process.
4
+
5
+ Usage inside FastAPI:
6
+ from app.models.loader import get_model_store
7
+ store = get_model_store() # dependency injection or direct call
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import logging
12
+ import os
13
+ import pickle
14
+ import time
15
+ from dataclasses import dataclass, field
16
+ from typing import Any
17
+
18
+ import numpy as np
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+ # ---------------------------------------------------------------------------
23
+ # Data class that holds every loaded artifact
24
+ # ---------------------------------------------------------------------------
25
+
26
+ @dataclass
27
+ class ModelStore:
28
+ # Pipeline A
29
+ xgb_model: Any = field(default=None)
30
+
31
+ # Pipeline B
32
+ encoder_model: Any = field(default=None) # Keras sub-model (encoder half)
33
+ lgbm_model: Any = field(default=None)
34
+
35
+ # Pipeline C
36
+ cnn_model: Any = field(default=None) # Keras ResNet50 feature extractor
37
+ svm_model: Any = field(default=None)
38
+
39
+ loaded: bool = field(default=False)
40
+
41
+
42
+ # Module-level singleton
43
+ _store: ModelStore | None = None
44
+
45
+
46
+ # ---------------------------------------------------------------------------
47
+ # Internal helpers
48
+ # ---------------------------------------------------------------------------
49
+
50
+ def _load_pickle(path: str, label: str) -> Any:
51
+ t0 = time.perf_counter()
52
+ with open(path, "rb") as f:
53
+ obj = pickle.load(f)
54
+ elapsed = (time.perf_counter() - t0) * 1000
55
+ logger.info("Loaded %-35s (%.1f ms)", label, elapsed)
56
+ return obj
57
+
58
+
59
+ def _build_encoder(autoencoder_pkl_path: str) -> Any:
60
+ """
61
+ Load the full autoencoder from pickle and extract the encoder sub-model.
62
+ The autoencoder is a Keras Sequential:
63
+ InputLayer (63)
64
+ Dense 32 relu ← layer index 0
65
+ Dense 16 relu ← layer index 1 ← bottleneck output
66
+ Dense 32 relu
67
+ Dense 63 linear
68
+ We build a Keras Model that maps input → output of the bottleneck Dense.
69
+ """
70
+ import os
71
+ os.environ.setdefault("KERAS_BACKEND", "tensorflow")
72
+ os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL", "3")
73
+
74
+ full_ae = _load_pickle(autoencoder_pkl_path, "autoencoder_model.pkl")
75
+
76
+ # Dynamically import keras after env vars are set
77
+ import keras
78
+
79
+ # The Sequential model's built layers: 0=Dense(32), 1=Dense(16), 2=Dense(32), 3=Dense(63)
80
+ # layer index 1 output is the 16-D bottleneck.
81
+ # We can't use full_ae.input directly on a Sequential that was pickled without
82
+ # a traced input tensor, so we wire the layers manually.
83
+ import numpy as _np
84
+ inp = keras.Input(shape=(63,), name="encoder_input")
85
+ x = full_ae.layers[0](inp) # Dense(32, relu)
86
+ x = full_ae.layers[1](x) # Dense(16, relu) — bottleneck
87
+ encoder = keras.Model(inputs=inp, outputs=x, name="encoder_only")
88
+ logger.info("Built encoder sub-model: input(%s) → output(%s)", encoder.input_shape, encoder.output_shape)
89
+ return encoder
90
+
91
+
92
+ def _build_cnn_feature_extractor(cnn_pkl_path: str) -> Any:
93
+ """
94
+ Load the full CNN (ResNet50 Functional model) from pickle and return a
95
+ sub-model that outputs the 256-D penultimate Dense layer.
96
+
97
+ Architecture (tail of the model):
98
+ … ResNet50 backbone …
99
+ GlobalAveragePooling2D
100
+ Dropout(0.5)
101
+ Dense(256, relu) ← feature vector we want
102
+ Dropout(0.5)
103
+ Dense(34, softmax) ← final classification head (skip this)
104
+
105
+ The SVC was trained on the 256-D features, so we must stop before the
106
+ final Dense(34) layer.
107
+ """
108
+ import os
109
+ os.environ.setdefault("KERAS_BACKEND", "tensorflow")
110
+ os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL", "3")
111
+
112
+ import keras
113
+
114
+ full_cnn = _load_pickle(cnn_pkl_path, "cnn_model.pkl")
115
+
116
+ # Find the Dense(256) layer by scanning from the end
117
+ feature_layer = None
118
+ for layer in reversed(full_cnn.layers):
119
+ cfg = layer.get_config()
120
+ if layer.__class__.__name__ == 'Dense' and cfg.get('units') == 256:
121
+ feature_layer = layer
122
+ break
123
+
124
+ if feature_layer is None:
125
+ logger.warning(
126
+ "Could not find Dense(256) layer; using full CNN output as features."
127
+ )
128
+ return full_cnn
129
+
130
+ extractor = keras.Model(
131
+ inputs=full_cnn.input,
132
+ outputs=feature_layer.output,
133
+ name="cnn_feature_extractor",
134
+ )
135
+ logger.info(
136
+ "CNN feature extractor: input %s → output %s",
137
+ extractor.input_shape,
138
+ extractor.output_shape,
139
+ )
140
+ return extractor
141
+
142
+
143
+ # ---------------------------------------------------------------------------
144
+ # Public API
145
+ # ---------------------------------------------------------------------------
146
+
147
+ def load_models() -> ModelStore:
148
+ """
149
+ Load all models and return a populated ModelStore.
150
+ Call this once from the FastAPI lifespan event.
151
+ """
152
+ global _store
153
+
154
+ from app import config # local import to avoid circular at module level
155
+
156
+ store = ModelStore()
157
+
158
+ # ---- Pipeline A --------------------------------------------------------
159
+ if os.path.exists(config.PIPELINE_A_MODEL):
160
+ store.xgb_model = _load_pickle(config.PIPELINE_A_MODEL, "xgb model.pkl")
161
+ else:
162
+ logger.warning("Pipeline A model not found: %s", config.PIPELINE_A_MODEL)
163
+
164
+ # ---- Pipeline B --------------------------------------------------------
165
+ if os.path.exists(config.PIPELINE_B_AE) and os.path.exists(config.PIPELINE_B_LGBM):
166
+ store.encoder_model = _build_encoder(config.PIPELINE_B_AE)
167
+ store.lgbm_model = _load_pickle(config.PIPELINE_B_LGBM, "lgbm_model.pkl")
168
+ else:
169
+ logger.warning("Pipeline B models not found — B will be skipped.")
170
+
171
+ # ---- Pipeline C --------------------------------------------------------
172
+ if os.path.exists(config.PIPELINE_C_CNN) and os.path.exists(config.PIPELINE_C_SVM):
173
+ store.cnn_model = _build_cnn_feature_extractor(config.PIPELINE_C_CNN)
174
+ store.svm_model = _load_pickle(config.PIPELINE_C_SVM, "svm_model.pkl")
175
+ else:
176
+ logger.warning("Pipeline C models not found — C will be skipped.")
177
+
178
+ store.loaded = True
179
+ logger.info("All models loaded successfully.")
180
+ _store = store
181
+ return store
182
+
183
+
184
+ def get_model_store() -> ModelStore:
185
+ """Return the singleton ModelStore (must have been loaded via load_models() first)."""
186
+ if _store is None or not _store.loaded:
187
+ raise RuntimeError("ModelStore has not been initialised — call load_models() first.")
188
+ return _store
backend/app/schemas.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Pydantic request / response schemas for SanketSetu backend.
3
+ """
4
+ from __future__ import annotations
5
+
6
+ from typing import List, Optional
7
+
8
+ from pydantic import BaseModel, Field, field_validator
9
+
10
+
11
+ # ---------------------------------------------------------------------------
12
+ # Requests
13
+ # ---------------------------------------------------------------------------
14
+
15
+ class LandmarkMessage(BaseModel):
16
+ """
17
+ Payload sent by the client over /ws/landmarks or POST /api/predict.
18
+ 'landmarks' is a flat list of [x0,y0,z0, x1,y1,z1, ..., x20,y20,z20]
19
+ extracted by MediaPipe Hands on the browser side.
20
+ """
21
+ landmarks: List[float] = Field(..., min_length=63, max_length=63)
22
+ session_id: str = Field(default="default")
23
+
24
+ @field_validator("landmarks")
25
+ @classmethod
26
+ def must_be_63_floats(cls, v: List[float]) -> List[float]:
27
+ if len(v) != 63:
28
+ raise ValueError(f"landmarks must contain exactly 63 values, got {len(v)}")
29
+ return v
30
+
31
+
32
+ class ImageMessage(BaseModel):
33
+ """
34
+ Payload sent when Pipeline C (CNN+SVM) is invoked via /ws/image.
35
+ 'image_b64' is a base-64 encoded JPEG of the cropped hand region (128×128).
36
+ """
37
+ image_b64: str = Field(..., description="Base-64 encoded JPEG of the hand crop (128×128 px)")
38
+ session_id: str = Field(default="default")
39
+
40
+
41
+ class EnsembleMessage(BaseModel):
42
+ """
43
+ Combined payload: landmarks + optional image for the full ensemble pipeline.
44
+ """
45
+ landmarks: List[float] = Field(..., min_length=63, max_length=63)
46
+ image_b64: Optional[str] = Field(default=None)
47
+ session_id: str = Field(default="default")
48
+
49
+
50
+ # ---------------------------------------------------------------------------
51
+ # Responses
52
+ # ---------------------------------------------------------------------------
53
+
54
+ class PredictionResponse(BaseModel):
55
+ sign: str = Field(..., description="Gujarati sign character(s)")
56
+ confidence: float = Field(..., ge=0.0, le=1.0)
57
+ pipeline: str = Field(..., description="Which pipeline(s) produced this result: A, B, C, or ensemble")
58
+ label_index: int = Field(..., ge=0, le=33)
59
+ probabilities: Optional[List[float]] = Field(
60
+ default=None,
61
+ description="Full 34-class probability vector (optional, increases payload size)"
62
+ )
63
+ latency_ms: Optional[float] = Field(default=None, description="Server-side inference latency in ms")
64
+
65
+
66
+ class HealthResponse(BaseModel):
67
+ status: str
68
+ models_loaded: bool
69
+ pipelines_available: List[str]
70
+
71
+
72
+ class ErrorResponse(BaseModel):
73
+ error: str
74
+ detail: Optional[str] = None
backend/requirements-dev.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Development + testing dependencies (not required in production)
2
+ pytest>=9.0
3
+ httpx>=0.28
4
+ pytest-anyio
backend/requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi>=0.115.0
2
+ uvicorn[standard]>=0.30.0
3
+ websockets>=12.0
4
+ xgboost>=2.0.0
5
+ lightgbm>=4.3.0
6
+ scikit-learn>=1.4.0
7
+ keras==3.13.2
8
+ tensorflow-cpu>=2.20.0
9
+ numpy>=1.26.0
10
+ opencv-python-headless>=4.9.0
11
+ pillow>=10.3.0
12
+ python-dotenv>=1.0.0
backend/tests/__init__.py ADDED
File without changes
backend/tests/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (164 Bytes). View file
 
backend/tests/__pycache__/conftest.cpython-312-pytest-9.0.2.pyc ADDED
Binary file (4.07 kB). View file
 
backend/tests/__pycache__/test_pipeline_a.cpython-312-pytest-9.0.2.pyc ADDED
Binary file (14.4 kB). View file
 
backend/tests/__pycache__/test_pipeline_b.cpython-312-pytest-9.0.2.pyc ADDED
Binary file (11.6 kB). View file