Spaces:

devrajsinh2012
/

Sanket-Setu

Sleeping

App Files Files Community

devrajsinh2012 commited on Mar 3

Commit

cf93910

0 Parent(s):

Initial commit: SanketSetu - Sign Language Recognition System

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.dockerignore +29 -0
.gitattributes +2 -0
.github/workflows/deploy-backend.yml +61 -0
.github/workflows/deploy-frontend.yml +45 -0
CNN_Autoencoder_LightGBM/autoencoder_model.pkl +3 -0
CNN_Autoencoder_LightGBM/autoencoder_model.pth +3 -0
CNN_Autoencoder_LightGBM/lgbm_model.pkl +3 -0
CNN_Autoencoder_LightGBM/lgbm_model.pth +3 -0
CNN_PreTrained/cnn_model.pkl +3 -0
CNN_PreTrained/cnn_model.pth +3 -0
CNN_PreTrained/svm_model.pkl +3 -0
CNN_PreTrained/svm_model.pth +3 -0
Dockerfile +48 -0
Mediapipe_XGBoost/model.pkl +3 -0
Mediapipe_XGBoost/model.pth +3 -0
README.md +108 -0
SanketSetu_ Production-Grade Implementation Plan.md +99 -0
TASKS.md +284 -0
backend/.env.example +32 -0
backend/app/__init__.py +1 -0
backend/app/__pycache__/__init__.cpython-312.pyc +0 -0
backend/app/__pycache__/config.cpython-312.pyc +0 -0
backend/app/__pycache__/main.cpython-312.pyc +0 -0
backend/app/__pycache__/schemas.cpython-312.pyc +0 -0
backend/app/config.py +64 -0
backend/app/inference/__init__.py +1 -0
backend/app/inference/__pycache__/__init__.cpython-312.pyc +0 -0
backend/app/inference/__pycache__/ensemble.cpython-312.pyc +0 -0
backend/app/inference/__pycache__/pipeline_a.cpython-312.pyc +0 -0
backend/app/inference/__pycache__/pipeline_b.cpython-312.pyc +0 -0
backend/app/inference/__pycache__/pipeline_c.cpython-312.pyc +0 -0
backend/app/inference/ensemble.py +138 -0
backend/app/inference/pipeline_a.py +57 -0
backend/app/inference/pipeline_b.py +59 -0
backend/app/inference/pipeline_c.py +86 -0
backend/app/main.py +280 -0
backend/app/models/__init__.py +1 -0
backend/app/models/__pycache__/__init__.cpython-312.pyc +0 -0
backend/app/models/__pycache__/label_map.cpython-312.pyc +0 -0
backend/app/models/__pycache__/loader.cpython-312.pyc +0 -0
backend/app/models/label_map.py +58 -0
backend/app/models/loader.py +188 -0
backend/app/schemas.py +74 -0
backend/requirements-dev.txt +4 -0
backend/requirements.txt +12 -0
backend/tests/__init__.py +0 -0
backend/tests/__pycache__/__init__.cpython-312.pyc +0 -0
backend/tests/__pycache__/conftest.cpython-312-pytest-9.0.2.pyc +0 -0
backend/tests/__pycache__/test_pipeline_a.cpython-312-pytest-9.0.2.pyc +0 -0
backend/tests/__pycache__/test_pipeline_b.cpython-312-pytest-9.0.2.pyc +0 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,29 @@

+# ── Python ────────────────────────────────────────────────────────────────────
+__pycache__/
+*.py[cod]
+*.pyo
+.venv/
+*.egg-info/
+.pytest_cache/
+.mypy_cache/
+dist/
+# ── Node / Frontend ───────────────────────────────────────────────────────────
+frontend/node_modules/
+frontend/dist/
+frontend/.env.local
+# ── Git / Editor ──────────────────────────────────────────────────────────────
+.git/
+.gitignore
+.vscode/
+*.md
+TASKS.md
+# ── OS ────────────────────────────────────────────────────────────────────────
+.DS_Store
+Thumbs.db
+# ── Model binary variants (keep only .pkl, not duplicate .pth) ───────────────
+# Both extensions are identical — Docker only needs .pkl
+**/*.pth

.gitattributes ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ *.pth filter=lfs diff=lfs merge=lfs -text
2	+ *.pkl filter=lfs diff=lfs merge=lfs -text

.github/workflows/deploy-backend.yml ADDED Viewed

	@@ -0,0 +1,61 @@

+name: Deploy Backend → Fly.io
+on:
+  push:
+    branches: [main]
+    paths:
+      - 'backend/**'
+      - 'Dockerfile'
+      - '.dockerignore'
+      - 'fly.toml'
+      - 'Mediapipe_XGBoost/**'
+      - 'CNN_Autoencoder_LightGBM/**'
+      - 'CNN_PreTrained/**'
+jobs:
+  test:
+    name: Run backend tests
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+          cache: pip
+          cache-dependency-path: backend/requirements.txt
+      - name: Install deps
+        run: |
+          pip install -r backend/requirements.txt
+          pip install -r backend/requirements-dev.txt
+      - name: Run tests
+        working-directory: backend
+        env:
+          KERAS_BACKEND: tensorflow
+          TF_CPP_MIN_LOG_LEVEL: "3"
+          CUDA_VISIBLE_DEVICES: ""
+          TF_ENABLE_ONEDNN_OPTS: "0"
+        run: pytest tests/ -v --tb=short -q
+        # Note: tests will be skipped automatically if model .pkl files are absent
+        # (model artefacts are gitignored). Add them as GitHub Actions artifacts
+        # or use DVC/GCS to restore them in CI if you want full test coverage.
+  deploy:
+    name: Deploy to Fly.io
+    needs: test
+    runs-on: ubuntu-latest
+    environment: production
+    concurrency:
+      group: fly-deploy
+      cancel-in-progress: true
+    steps:
+      - uses: actions/checkout@v4
+      - uses: superfly/flyctl-actions/setup-flyctl@master
+      - name: Deploy
+        run: flyctl deploy --remote-only
+        env:
+          FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}

.github/workflows/deploy-frontend.yml ADDED Viewed

	@@ -0,0 +1,45 @@

+name: Deploy Frontend → Vercel
+on:
+  push:
+    branches: [main]
+    paths:
+      - 'frontend/**'
+jobs:
+  build-and-deploy:
+    name: Build & Deploy
+    runs-on: ubuntu-latest
+    environment: production
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-node@v4
+        with:
+          node-version: '22'
+          cache: npm
+          cache-dependency-path: frontend/package-lock.json
+      - name: Install dependencies
+        working-directory: frontend
+        run: npm ci
+      - name: Type-check
+        working-directory: frontend
+        run: npx tsc --project tsconfig.app.json --noEmit
+      - name: Build
+        working-directory: frontend
+        env:
+          VITE_WS_URL:  ${{ vars.VITE_WS_URL }}
+          VITE_API_URL: ${{ vars.VITE_API_URL }}
+        run: npm run build
+      - name: Deploy to Vercel
+        uses: amondnet/vercel-action@v25
+        with:
+          vercel-token:   ${{ secrets.VERCEL_TOKEN }}
+          vercel-org-id:  ${{ secrets.VERCEL_ORG_ID }}
+          vercel-project-id: ${{ secrets.VERCEL_PROJECT_ID }}
+          working-directory: frontend
+          vercel-args: '--prod'

CNN_Autoencoder_LightGBM/autoencoder_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5843688f059d26851774e553c4afddbc7c0f2f7fc048401b8447f290a63d2cbe
+size 92934

CNN_Autoencoder_LightGBM/autoencoder_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3593536edda0328121d5f92fd186a8e40c341799bd9bb703e0e2ad155b6e7aeb
+size 121321

CNN_Autoencoder_LightGBM/lgbm_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5e83d2bb3a18da0b3ccdd7afc5d044fa52c6e70c4e6090b312a622a866ee0008
+size 3623126

CNN_Autoencoder_LightGBM/lgbm_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a09f7b712da5f0e6b63e222e4ea938029567bd8cf496da7ad93752d54219b57
+size 3626367

CNN_PreTrained/cnn_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:464df17407edea99db1b69c20e7ff718f6ceafb05f1bbeaacc889499e4cd920a
+size 97136794

CNN_PreTrained/cnn_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6eb8e3419763c47b5ba2480ccaf9907e8d748602b26fe59c009b6112fa840ae5
+size 146278905

CNN_PreTrained/svm_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cf21a17c1340b84359c3431fc4ae8eb05239e4e1ef58dd34ab775f53b9bc7f53
+size 929927

CNN_PreTrained/svm_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e706ebf4588c580d0e6ac6f1554f9fd2eaef5564ee02f8022e3ca5f13bb8985b
+size 1079865

Dockerfile ADDED Viewed

	@@ -0,0 +1,48 @@

+# ─────────────────────────────────────────────────────────────────────────────
+# SanketSetu Backend — Dockerfile
+# Build context: repo root (SanketSetu/)
+#
+#   docker build -t sanketsetu-backend .
+#   docker run -p 8000:8000 sanketsetu-backend
+# ─────────────────────────────────────────────────────────────────────────────
+FROM python:3.12-slim AS base
+# System libraries needed by OpenCV headless + Pillow
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        libgl1 libglib2.0-0 libgomp1 \
+    && rm -rf /var/lib/apt/lists/*
+# ── Python dependencies (cached layer) ───────────────────────────────────────
+WORKDIR /app
+COPY backend/requirements.txt ./
+RUN pip install --no-cache-dir -r requirements.txt
+# ── Application source ────────────────────────────────────────────────────────
+COPY backend/app/ ./app/
+# ── Model artefacts ───────────────────────────────────────────────────────────
+# Copied to /models so the container is fully self-contained.
+# Override at runtime with -e WEIGHTS_DIR=/mnt/models + bind-mount if preferred.
+COPY Mediapipe_XGBoost/       /models/Mediapipe_XGBoost/
+COPY CNN_Autoencoder_LightGBM/ /models/CNN_Autoencoder_LightGBM/
+COPY CNN_PreTrained/           /models/CNN_PreTrained/
+# ── Runtime environment ───────────────────────────────────────────────────────
+ENV WEIGHTS_DIR=/models \
+    KERAS_BACKEND=tensorflow \
+    TF_CPP_MIN_LOG_LEVEL=3 \
+    CUDA_VISIBLE_DEVICES="" \
+    TF_ENABLE_ONEDNN_OPTS=0 \
+    OMP_NUM_THREADS=4 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1
+EXPOSE 8000
+# ── Health-check ──────────────────────────────────────────────────────────────
+# Wait up to 3 minutes for models to load before marking the container healthy.
+HEALTHCHECK --interval=30s --timeout=10s --start-period=180s --retries=3 \
+    CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health', timeout=5)"
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]

Mediapipe_XGBoost/model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a736b83df3e56b69b0f1c11f018257760746969d6598d90ea2a60c78f8305883
+size 1711525

Mediapipe_XGBoost/model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2ff5f1cbc121be57f2a7fe04b38925ea740fe79602a6205ca09a748cb0f20b81
+size 1895969

README.md ADDED Viewed

	@@ -0,0 +1,108 @@

+# SanketSetu
+A real-time sign language recognition system using machine learning and computer vision.
+## Overview
+SanketSetu is an intelligent sign language interpretation system that provides real-time recognition and translation of sign language gestures using advanced machine learning models and MediaPipe hand tracking.
+## Project Structure
+```
+├── backend/              # FastAPI backend server
+│   ├── app/             # Main application code
+│   │   ├── inference/   # ML inference pipelines
+│   │   └── models/      # Model loading and management
+│   └── tests/           # Backend tests
+├── frontend/            # React + TypeScript frontend
+│   └── src/
+│       ├── components/  # React components
+│       ├── hooks/       # Custom React hooks
+│       └── lib/         # Utility libraries
+├── CNN_Autoencoder_LightGBM/  # CNN Autoencoder + LightGBM model
+├── CNN_PreTrained/              # CNN + SVM model
+└── Mediapipe_XGBoost/          # MediaPipe + XGBoost model
+```
+## Features
+- Real-time sign language gesture recognition
+- Multiple ML model ensemble approach
+- WebSocket-based real-time communication
+- MediaPipe hand landmark tracking
+- Interactive webcam feed with visual feedback
+- Prediction confidence display
+## Tech Stack
+### Backend
+- FastAPI
+- Python 3.x
+- PyTorch
+- LightGBM
+- XGBoost
+- MediaPipe
+### Frontend
+- React
+- TypeScript
+- Vite
+- TailwindCSS
+## Getting Started
+### Prerequisites
+- Python 3.8+
+- Node.js 16+
+- npm or yarn
+### Backend Setup
+```bash
+cd backend
+pip install -r requirements.txt
+python -m app.main
+```
+### Frontend Setup
+```bash
+cd frontend
+npm install
+npm run dev
+```
+## Development
+Run the development servers:
+```bash
+# Start both frontend and backend
+.\start.ps1
+```
+## Docker
+Build and run using Docker:
+```bash
+docker build -t sanketsetu .
+docker run -p 8000:8000 sanketsetu
+```
+## Testing
+Run backend tests:
+```bash
+cd backend
+pytest
+```
+## License
+All rights reserved.
+## Author
+Devrajsinh Gohil (devrajsinh2012)

SanketSetu_ Production-Grade Implementation Plan.md ADDED Viewed

	@@ -0,0 +1,99 @@

+# SanketSetu: Production-Grade Implementation Plan
+## 1. Executive Summary
+**SanketSetu** (Bridge of Signs) is a high-performance, real-time Gujarati Sign Language (GSL) recognition system. This document outlines a production-ready architecture designed to run entirely on **free-tier cloud services**. The system leverages a decoupled architecture with a React-based interactive frontend and a FastAPI backend, ensuring low-latency inference and a seamless user experience.
+---
+## 2. High-Level System Architecture
+The system follows a modern microservices-inspired pattern to ensure scalability and ease of updates.
+| Component | Technology | Role | Hosting (Free Tier) |
+| :--- | :--- | :--- | :--- |
+| **Frontend** | React + Vite + TS | User interface, webcam capture, real-time feedback | **Vercel** |
+| **Backend API** | FastAPI (Python) | WebSocket management, API gateway, logic | **Fly.io** |
+| **Inference Engine** | ONNX Runtime / XGBoost | High-speed model execution | **Fly.io** (Internal) |
+| **Storage** | Cloudflare R2 | S3-compatible storage for model weights | **Cloudflare** |
+| **Real-time** | WebSockets (WSS) | Low-latency frame-by-frame data transfer | N/A |
+---
+## 3. Backend Implementation Details
+### 3.1 API Design (FastAPI)
+The backend is built for speed. It handles binary data from WebSockets to minimize overhead.
+*   **WebSocket Protocol**: The client sends a stream of normalized hand landmark coordinates (63 points per frame) extracted locally via MediaPipe. This reduces bandwidth significantly compared to sending raw video frames.
+*   **Concurrency**: Uses `asyncio` to handle multiple simultaneous user connections without blocking the event loop.
+*   **Model Loading**: Models are loaded into memory at startup using a Singleton pattern to ensure zero-latency on the first request.
+### 3.2 Model Serving Strategy
+1.  **Primary Model**: The **XGBoost** model is used as the default due to its sub-millisecond inference time.
+2.  **Backup/Ensemble**: The system can optionally query the **CNN+SVM** or **LGBM** models for high-confidence verification if the XGBoost score is below a certain threshold.
+3.  **Optimization**: Models are converted to **ONNX** format to leverage the ONNX Runtime's hardware-specific optimizations, even on free-tier CPU instances.
+---
+## 4. Frontend & Interactive UI/UX
+The frontend is designed to be "cool," responsive, and highly interactive, providing users with a "futuristic" feel.
+### 4.1 Tech Stack
+*   **Styling**: Tailwind CSS for rapid, modern UI development.
+*   **Animations**: Framer Motion for smooth transitions, layout changes, and interactive elements.
+*   **Icons**: Lucide React for a clean, consistent icon set.
+### 4.2 Key UI Features
+*   **Glassmorphism Design**: Use of semi-transparent backgrounds with blur effects for a modern look.
+*   **Interactive Landmark Overlay**: A canvas overlay on the webcam feed that draws the 21 hand landmarks in real-time. Landmarks will "glow" when a sign is successfully recognized.
+*   **Dynamic Prediction HUD**: A Head-Up Display (HUD) style interface that shows the current prediction, confidence level, and a history of recently detected signs.
+*   **Responsive Layout**: Fully functional on mobile and desktop, with optimized camera controls for both.
+### 4.3 User Experience Flow
+1.  **Onboarding**: A quick, animated guide on how to position the hand for best results.
+2.  **Calibration**: A brief "Ready?" state that ensures the lighting and hand distance are optimal.
+3.  **Real-time Translation**: Instant feedback as the user signs, with the translated Gujarati text appearing in a stylized "speech bubble" or text box.
+---
+## 4. Deployment & DevOps
+### 4.1 Continuous Integration/Deployment (CI/CD)
+Using **GitHub Actions**, the project will follow a strict deployment pipeline:
+1.  **Lint & Test**: Ensure code quality and run unit tests for ML logic.
+2.  **Build**: Create optimized production builds for the React app and Dockerize the FastAPI backend.
+3.  **Deploy**:
+    *   Frontend automatically pushes to **Vercel**.
+    *   Backend pushes to **Fly.io** using `flyctl`.
+### 4.2 Scalability & Cost Management
+*   **Scale-to-Zero**: The backend on Fly.io can be configured to sleep when not in use to preserve free-tier resources.
+*   **CDN Caching**: Vercel's Edge Network will cache all static assets, ensuring fast load times globally.
+---
+## 5. Implementation Roadmap
+### Phase 1: Core Backend & ML Integration
+- [ ] Set up FastAPI project structure.
+- [ ] Implement WebSocket handler for landmark data.
+- [ ] Integrate the trained XGBoost model for real-time inference.
+### Phase 2: Advanced Frontend Development
+- [ ] Initialize Vite + React project with Tailwind.
+- [ ] Implement webcam capture and MediaPipe landmark extraction (client-side).
+- [ ] Create the interactive HUD and glassmorphism UI.
+### Phase 3: Production Hardening
+- [ ] Set up GitHub Actions for automated deployment.
+- [ ] Implement error handling for low-bandwidth scenarios.
+- [ ] Finalize documentation and user guide.
+---
+## 6. References
+[1] [FastAPI Documentation](https://fastapi.tiangolo.com/) - High-performance web framework for building APIs.
+[2] [MediaPipe Hands](https://developers.google.com/mediapipe/solutions/vision/hand_landmarker) - Real-time hand landmark detection.
+[3] [Framer Motion](https://www.framer.com/motion/) - A production-ready motion library for React.
+[4] [Fly.io Free Tier](https://fly.io/docs/about/pricing/) - Details on free-tier resource allocation.
+[5] [Vercel Deployment](https://vercel.com/docs/deployments/overview) - Global CDN and hosting for frontend applications.

TASKS.md ADDED Viewed

	@@ -0,0 +1,284 @@

+# SanketSetu — Execution TODO & Implementation Tracker
+## Model Analysis (Reviewed 2026-03-02)
+All 5 model files inspected. Three distinct inference pipelines exist:
+| Pipeline | Files | Input | Process | Output |
+|---|---|---|---|---|
+| **A — Primary (Fastest)** | `Mediapipe_XGBoost/model.pkl` | 63 MediaPipe coords (21 landmarks × x,y,z) | XGBClassifier (50 trees) | 34-class probability |
+| **B — Autoencoder + LGBM** | `CNN_Autoencoder_LightGBM/autoencoder_model.pkl` + `lgbm_model.pkl` | 63 MediaPipe coords | Encoder (63→32→**16** bottleneck) + LGBMClassifier | 34-class probability |
+| **C — Vision CNN + SVM** | `CNN_PreTrained/cnn_model.pkl` + `svm_model.pkl` | 128×128×3 RGB image | ResNet50-based CNN (179 layers) → 256 features + SVC(C=10) | 34-class probability w/ probability=True |
+### Key Architecture Facts
+- **34 classes** (Gujarati Sign Language alphabet + digits, labels 0–33)
+- **Pipeline A** input: 63 floats — directly from MediaPipe `hand_landmarks` (x, y, z per landmark, flattened)
+- **Pipeline B** input: same 63 floats → takes only the encoder half (first 3 Dense layers, output of `dense_1` layer = 16 features)
+- **Pipeline C** input: 128×128 BGR/RGB cropped hand image, normalized to [0,1]
+- All `.pth` files are identical copies of the `.pkl` files (same objects, different extension)
+- Model quality strategy: A is primary (sub-ms); if confidence < threshold, query B or C for ensemble
+---
+## Project Folder Structure to Create
+```
+SanketSetu/
+├── backend/                    ← FastAPI server
+│   ├── app/
+│   │   ├── main.py             ← FastAPI entry, WebSocket + REST
+│   │   ├── models/
+│   │   │   ├── loader.py       ← Singleton model loader
+│   │   │   └── label_map.py    ← 0–33 → Gujarati sign name mapping
+│   │   ├── inference/
+│   │   │   ├── pipeline_a.py   ← XGBoost inference (63 landmarks)
+│   │   │   ├── pipeline_b.py   ← Autoencoder encoder + LightGBM
+│   │   │   ├── pipeline_c.py   ← ResNet CNN + SVM (image-based)
+│   │   │   └── ensemble.py     ← Confidence-weighted ensemble logic
+│   │   ├── schemas.py          ← Pydantic request/response models
+│   │   └── config.py           ← Settings (confidence threshold, etc.)
+│   ├── weights/                ← Symlink or copy of model pkl files
+│   ├── requirements.txt
+│   ├── Dockerfile
+│   └── fly.toml
+│
+├── frontend/                   ← Vite + React + TS
+│   ├── src/
+│   │   ├── components/
+│   │   │   ├── WebcamFeed.tsx       ← Webcam + canvas landmark overlay
+│   │   │   ├── LandmarkCanvas.tsx   ← Draws 21 hand points + connections
+│   │   │   ├── PredictionHUD.tsx    ← Live sign, confidence bar, history
+│   │   │   ├── OnboardingGuide.tsx  ← Animated intro wizard
+│   │   │   └── Calibration.tsx      ← Lighting/distance check UI
+│   │   ├── hooks/
+│   │   │   ├── useWebSocket.ts      ← WS connection, send/receive
+│   │   │   ├── useMediaPipe.ts      ← MediaPipe Hands JS integration
+│   │   │   └── useWebcam.ts         ← Camera permissions + stream
+│   │   ├── lib/
+│   │   │   └── landmarkUtils.ts     ← Landmark normalization (mirror XGBoost preprocessing)
+│   │   ├── App.tsx
+│   │   └── main.tsx
+│   ├── public/
+│   ├── index.html
+│   ├── tailwind.config.ts
+│   ├── vite.config.ts
+│   └── package.json
+│
+├── CNN_Autoencoder_LightGBM/   ← (existing)
+├── CNN_PreTrained/             ← (existing)
+├── Mediapipe_XGBoost/          ← (existing)
+└── .github/
+    └── workflows/
+        ├── deploy-backend.yml
+        └── deploy-frontend.yml
+```
+---
+## Phase 1 — Backend Core (FastAPI + Model Integration)
+### 1.1 Project Bootstrap
+- [x] Create `backend/` folder and `app/` package structure
+- [x] Create `backend/requirements.txt` with: `fastapi`, `uvicorn[standard]`, `websockets`, `xgboost`, `lightgbm`, `scikit-learn`, `keras==3.13.2`, `tensorflow-cpu`, `numpy`, `opencv-python-headless`, `pillow`, `python-dotenv`
+- [x] Create `backend/app/config.py` — confidence threshold (default 0.7), WebSocket max connections, pipeline mode (A/B/C/ensemble)
+- [x] Create `backend/app/models/label_map.py` — map class indices 0–33 to Gujarati sign names
+### 1.2 Model Loader (Singleton)
+- [x] Create `backend/app/models/loader.py`
+  - Load `model.pkl` (XGBoost) at startup
+  - Load `autoencoder_model.pkl` (extract encoder layers only: input → dense → dense_1) and `lgbm_model.pkl`
+  - Load `cnn_model.pkl` (full ResNet50 feature extractor, strip any classification head) and `svm_model.pkl`
+  - Expose `ModelStore` singleton accessed via `get_model_store()` dependency
+  - Log load times for each model
+### 1.3 Pipeline A — XGBoost (Primary, Landmarks)
+- [x] Create `backend/app/inference/pipeline_a.py`
+  - Input: `List[float]` of length 63 (x,y,z per landmark, already normalized by MediaPipe)
+  - Output: `{"sign": str, "confidence": float, "probabilities": List[float]}`
+  - Use `model.predict_proba(np.array(landmarks).reshape(1,-1))[0]`
+  - Return `classes_[argmax]` and `max(probabilities)` as confidence
+### 1.4 Pipeline B — Autoencoder Encoder + LightGBM
+- [x] Create `backend/app/inference/pipeline_b.py`
+  - Build encoder-only submodel: `encoder = keras.Model(inputs=model.input, outputs=model.layers[2].output)` (output of `dense_1`, the 16-D bottleneck)
+  - Input: 63 MediaPipe coords
+  - Encode: `features = encoder.predict(np.array(landmarks).reshape(1,-1))[0]`  → shape (16,)
+  - Classify: `lgbm.predict_proba(features.reshape(1,-1))[0]`
+### 1.5 Pipeline C — CNN + SVM (Image-based)
+- [x] Create `backend/app/inference/pipeline_c.py`
+  - Input: base64-encoded JPEG or raw bytes of the cropped hand region (128×128 px)
+  - Decode → numpy array (128,128,3) uint8 → normalize to float32 [0,1]
+  - `features = cnn_model.predict(img[np.newaxis])[0]`  → shape (256,)
+  - `proba = svm.predict_proba(features.reshape(1,-1))[0]`
+  - Note: CNN inference is slower (~50–200ms on CPU); only call when Pipeline A confidence < threshold
+### 1.6 Ensemble Logic
+- [x] Create `backend/app/inference/ensemble.py`
+  - Call Pipeline A first
+  - If `confidence < config.THRESHOLD` (default 0.7), call Pipeline B
+  - If still below threshold and image data available, call Pipeline C
+  - Final result: weighted average of probabilities from each pipeline that was called
+  - Return the top predicted class and ensemble confidence score
+### 1.7 WebSocket Handler
+- [x] Create `backend/app/main.py` with FastAPI app
+- [x] Implement `GET /health` — returns `{"status": "ok", "models_loaded": true}`
+- [x] Implement `WS /ws/landmarks` — primary endpoint
+  - Client sends JSON: `{"landmarks": [63 floats], "session_id": "..."}`
+  - Server responds: `{"sign": "...", "confidence": 0.95, "pipeline": "A", "label_index": 12}`
+  - Handle disconnect gracefully
+- [x] Implement `WS /ws/image` — optional image-based endpoint for Pipeline C
+  - Client sends JSON: `{"image_b64": "...", "session_id": "..."}`
+- [x] Implement `POST /api/predict` — REST fallback for non-WS clients
+  - Body: `{"landmarks": [63 floats]}`
+  - Returns same response schema as WS
+### 1.8 Schemas & Validation
+- [x] Create `backend/app/schemas.py`
+  - `LandmarkMessage(BaseModel)`: `landmarks: List[float]` (must be length 63), `session_id: str`
+  - `ImageMessage(BaseModel)`: `image_b64: str`, `session_id: str`
+  - `PredictionResponse(BaseModel)`: `sign: str`, `confidence: float`, `pipeline: str`, `label_index: int`, `probabilities: Optional[List[float]]`
+### 1.9 CORS & Middleware
+- [x] Configure CORS for Vercel frontend domain + localhost:5173
+- [x] Add request logging middleware (log session_id, pipeline used, latency ms)
+- [x] Add global exception handler returning proper JSON errors
+---
+## Phase 2 — Frontend (React + Vite + Tailwind + Framer Motion)
+### 2.1 Project Bootstrap
+- [x] Run `npm create vite@latest frontend -- --template react-ts` inside `SanketSetu/`
+- [x] Install deps: `tailwindcss`, `framer-motion`, `lucide-react`, `@mediapipe/tasks-vision`
+- [x] Configure Tailwind with custom palette (dark neon-cyan glassmorphism theme)
+- [x] Set up `vite.config.ts` proxy: `/api` → backend URL, `/ws` → backend WS URL
+### 2.2 Webcam Hook (`useWebcam.ts`)
+- [x] Request `getUserMedia({ video: { width: 1280, height: 720 } })`
+- [x] Expose `videoRef`, `isReady`, `error`, `switchCamera()` (for mobile front/back toggle)
+- [x] Handle permission denied state with instructional UI
+### 2.3 MediaPipe Hook (`useMediaPipe.ts`)
+- [x] Initialize `HandLandmarker` from `@mediapipe/tasks-vision` (WASM backend)
+- [x] Process video frames at target 30fps using `requestAnimationFrame`
+- [x] Extract `landmarks[0]` (first hand) → flatten to 63 floats `[x0,y0,z0, x1,y1,z1, ...]`
+- [x] Normalize: subtract wrist (landmark 0) position to make translation-invariant — **must match training preprocessing**
+- [x] Expose `landmarks: number[] | null`, `handedness: string`, `isDetecting: boolean`
+### 2.4 WebSocket Hook (`useWebSocket.ts`)
+- [x] Connect to `wss://backend-url/ws/landmarks` on mount
+- [x] Auto-reconnect with exponential backoff on disconnect
+- [x] `sendLandmarks(landmarks: number[])` — throttled to max 15 sends/sec
+- [x] Expose `lastPrediction: PredictionResponse | null`, `isConnected: boolean`, `latency: number`
+### 2.5 Landmark Canvas (`LandmarkCanvas.tsx`)
+- [x] Overlay `<canvas>` on top of `<video>` with `position: absolute`
+- [x] Draw 21 hand landmark dots (cyan glow: `shadowBlur`, `shadowColor`)
+- [x] Draw 21 bone connections following MediaPipe hand topology (finger segments)
+- [x] On successful prediction: animate landmarks to pulse/glow with Framer Motion spring
+- [x] Use `requestAnimationFrame` for smooth 60fps rendering
+### 2.6 Prediction HUD (`PredictionHUD.tsx`)
+- [x] Glassmorphism card: `backdrop-blur`, `bg-white/10`, `border-white/20`
+- [x] Large Gujarati sign name (mapped from label index)
+- [x] Confidence bar: animated width transition via Framer Motion `animate={{ width: confidence% }}`
+- [x] Color coding: green (>85%), yellow (60–85%), red (<60%)
+- [x] Rolling history list: last 10 recognized signs (Framer Motion `AnimatePresence` for enter/exit)
+- [x] Pipeline badge: shows which pipeline (A/B/C) produced the result
+- [x] Latency display: shows WS round-trip time in ms
+### 2.7 Onboarding Guide (`OnboardingGuide.tsx`)
+- [x] 3-step animated wizard using Framer Motion page transitions
+  1. "Position your hand 30–60cm from camera"
+  2. "Ensure good lighting, avoid dark backgrounds"
+  3. "Show signs clearly — palm facing camera"
+- [x] Skip button + "Don't show again" (localStorage)
+### 2.8 Calibration Screen (`Calibration.tsx`)
+- [x] Brief 2-second "Ready?" screen after onboarding
+- [x] Check: hand detected by MediaPipe → show green checkmark animation
+- [x] Auto-transitions to main translation view when hand is stable for 1 second
+### 2.9 Main App Layout (`App.tsx`)
+- [x] Full-screen dark background with subtle animated gradient
+- [x] Three-panel layout (desktop): webcam | HUD | history
+- [x] Mobile: stacked layout with webcam top, HUD bottom
+- [x] Header: "SanketSetu | સંકેત-સેતુ" with glowing text effect
+- [x] Settings gear icon → modal for pipeline selection (A / B / C / Ensemble), confidence threshold slider
+---
+## Phase 3 — Dockerization & Deployment
+### 3.1 Backend Dockerfile
+- [x] Create `Dockerfile` (repo root, build context includes models)
+- [x] Add `.dockerignore` (excludes `.venv`, `node_modules`, `*.pth`, tests)
+- [ ] Test locally: `docker build -t sanketsetu-backend . && docker run -p 8000:8000 sanketsetu-backend`
+### 3.2 Fly.io Configuration
+- [x] Create `fly.toml` (repo root, region=maa, port 8000, shared-cpu-2x)
+- [x] Note: Keras/TF will increase Docker image size — use `tensorflow-cpu` to keep slim
+- [ ] Set secrets via `flyctl secrets set` for any API keys
+- [ ] Run: `flyctl deploy --dockerfile Dockerfile`
+### 3.3 Vercel Frontend Deployment
+- [x] Create `frontend/vercel.json` with SPA rewrite + WASM Content-Type header
+- [x] Add `VITE_WS_URL` and `VITE_API_URL` to Vercel environment variables (via CI vars)
+- [ ] Ensure `@mediapipe/tasks-vision` WASM files are served correctly (add to `public/`)
+### 3.4 GitHub Actions CI/CD
+- [x] Create `.github/workflows/deploy-backend.yml`
+  - Triggers on push to `main` when `backend/**` changes
+  - Steps: checkout → setup Python → run tests → `flyctl deploy`
+- [x] Create `.github/workflows/deploy-frontend.yml`
+  - Triggers on push to `main` when `frontend/**` changes
+  - Steps: checkout → `npm ci` → tsc → `npm run build` → Vercel CLI deploy
+---
+## Phase 4 — Testing & Hardening
+### 4.1 Backend Tests
+- [x] `tests/test_pipeline_a.py` — 8 unit tests, XGBoost inference (4s)
+- [x] `tests/test_pipeline_b.py` — 6 unit tests, encoder + LightGBM (49s)
+- [x] `tests/test_pipeline_c.py` — 7 unit tests, CNN + SVM with real 128×128 images (14s)
+- [x] `tests/test_websocket.py` — 7 integration tests, health + REST + WS round-trip
+### 4.2 Frontend Error Handling
+- [ ] No-camera fallback UI (file upload for image mode)
+- [x] WS reconnecting banner (red banner when `!isConnected && stage === 'running'`)
+- [x] Low-bandwidth mode: reduce send rate to 5fps if latency > 500ms + yellow "LB" badge in HUD
+- [x] MediaPipe WASM load failure fallback message (shown in header via `mpError`)
+### 4.3 Label Map (Critical)
+- [ ] Create `backend/app/models/label_map.py` mapping classes 0–33 to actual Gujarati signs
+  - You need to confirm the exact mapping used during training (check your original dataset/notebook)
+  - Placeholder: `LABEL_MAP = { 0: "ક", 1: "ખ", ... , 33: "?" }`
+  - This file must exactly mirror what was used in training
+---
+## Execution Order (Start Here)
+```
+Week 1: Phase 1.1 → 1.3 → 1.7 (get WS working with Pipeline A alone, test in browser)
+Week 2: Phase 1.4 → 1.5 → 1.6 (add other pipelines + ensemble)
+Week 3: Phase 2.1 → 2.2 → 2.3 → 2.4 (React skeleton + WS connected)
+Week 4: Phase 2.5 → 2.6 → 2.7 → 2.8 → 2.9 (full UI)
+Week 5: Phase 3 + 4 (deploy + tests)
+```
+---
+## Critical Decision Points
+| Decision | Default | Notes |
+|---|---|---|
+| Primary pipeline | **A (XGBoost)** | Sub-ms inference, uses MediaPipe landmarks already extracted client-side |
+| Confidence threshold for fallback | **0.70** | Tune after testing - if XGBoost < 70%, call Pipeline B |
+| Enable Pipeline C (CNN) | **Optional / off by default** | Adds ~150ms latency and requires image upload, not just landmarks |
+| MediaPipe model variant | **lite** | Use `hand_landmarker_lite.task` for mobile performance |
+| WebSocket frame rate | **15fps** | Sufficient for sign recognition, avoids server overload |
+| Gujarati label map | **CONFIRM WITH DATASET** | Classes 0–33 must match training data exactly |

backend/.env.example ADDED Viewed

	@@ -0,0 +1,32 @@

+# SanketSetu Backend — environment variables
+# Copy this file to .env and edit as needed.
+# All values below are the defaults; remove a line to keep the default.
+# ── Model paths (default: resolved from repo root) ─────────────────────────
+# WEIGHTS_DIR=/absolute/path/to/model/dir
+# ── Inference ───────────────────────────────────────────────────────────────
+# Pipeline A confidence below this → also run Pipeline B
+CONFIDENCE_THRESHOLD=0.70
+# Pipeline A+B ensemble confidence below this → also run Pipeline C (if image)
+SECONDARY_THRESHOLD=0.60
+# Which pipeline to run: A | B | C | ensemble
+PIPELINE_MODE=ensemble
+# ── Server ──────────────────────────────────────────────────────────────────
+MAX_WS_CONNECTIONS=100
+# Comma-separated list of allowed CORS origins
+CORS_ORIGINS=http://localhost:5173,http://localhost:3000
+# ── TensorFlow / Keras ──────────────────────────────────────────────────────
+KERAS_BACKEND=tensorflow
+TF_CPP_MIN_LOG_LEVEL=3
+CUDA_VISIBLE_DEVICES=         # empty = CPU-only, skip GPU scan (faster startup)
+TF_ENABLE_ONEDNN_OPTS=0
+OMP_NUM_THREADS=4
+# ── Logging ─────────────────────────────────────────────────────────────────
+LOG_LEVEL=INFO

backend/app/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # backend/app/__init__.py

backend/app/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (162 Bytes). View file

backend/app/__pycache__/config.cpython-312.pyc ADDED Viewed

Binary file (2.97 kB). View file

backend/app/__pycache__/main.cpython-312.pyc ADDED Viewed

Binary file (12.1 kB). View file

backend/app/__pycache__/schemas.cpython-312.pyc ADDED Viewed

Binary file (3.93 kB). View file

backend/app/config.py ADDED Viewed

	@@ -0,0 +1,64 @@

+"""
+Application-wide settings for SanketSetu backend.
+Override any value by setting the corresponding environment variable.
+"""
+from __future__ import annotations
+import os
+from pathlib import Path
+# ---------------------------------------------------------------------------
+# TensorFlow / Keras startup optimisations
+# Set these BEFORE any import that might pull in tensorflow.
+# ---------------------------------------------------------------------------
+os.environ.setdefault("KERAS_BACKEND",          "tensorflow")
+os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL",   "3")          # silence C++ TF logs
+os.environ.setdefault("CUDA_VISIBLE_DEVICES",   "")           # CPU-only: skip GPU scan
+os.environ.setdefault("TF_ENABLE_ONEDNN_OPTS",  "0")          # disable oneDNN init check
+os.environ.setdefault("OMP_NUM_THREADS",        "4")          # cap CPU thread pool
+# ---------------------------------------------------------------------------
+# Paths
+# ---------------------------------------------------------------------------
+BASE_DIR = Path(__file__).resolve().parent.parent.parent  # repo root
+WEIGHTS_DIR = os.getenv("WEIGHTS_DIR", str(BASE_DIR))
+# Individual model paths (relative to repo root)
+PIPELINE_A_MODEL   = os.path.join(WEIGHTS_DIR, "Mediapipe_XGBoost",       "model.pkl")
+PIPELINE_B_AE      = os.path.join(WEIGHTS_DIR, "CNN_Autoencoder_LightGBM", "autoencoder_model.pkl")
+PIPELINE_B_LGBM    = os.path.join(WEIGHTS_DIR, "CNN_Autoencoder_LightGBM", "lgbm_model.pkl")
+PIPELINE_C_CNN     = os.path.join(WEIGHTS_DIR, "CNN_PreTrained",            "cnn_model.pkl")
+PIPELINE_C_SVM     = os.path.join(WEIGHTS_DIR, "CNN_PreTrained",            "svm_model.pkl")
+# ---------------------------------------------------------------------------
+# Inference thresholds
+# ---------------------------------------------------------------------------
+# If Pipeline A confidence falls below this, Pipeline B is also called.
+CONFIDENCE_THRESHOLD: float = float(os.getenv("CONFIDENCE_THRESHOLD", "0.70"))
+# If ensemble after B still below this, Pipeline C is attempted (if image provided).
+SECONDARY_THRESHOLD: float = float(os.getenv("SECONDARY_THRESHOLD", "0.60"))
+# ---------------------------------------------------------------------------
+# Pipeline mode
+# ---------------------------------------------------------------------------
+# "A"        → only XGBoost (fastest)
+# "B"        → only Autoencoder + LGBM
+# "C"        → only CNN + SVM (image required)
+# "ensemble" → A first, fallback to B, then C
+PIPELINE_MODE: str = os.getenv("PIPELINE_MODE", "ensemble")
+# ---------------------------------------------------------------------------
+# WebSocket / server
+# ---------------------------------------------------------------------------
+MAX_WS_CONNECTIONS: int = int(os.getenv("MAX_WS_CONNECTIONS", "100"))
+WS_SEND_RATE_LIMIT: int = int(os.getenv("WS_SEND_RATE_LIMIT", "15"))  # max frames/sec per client
+# Allowed CORS origins (comma-separated list in env var)
+_cors_env = os.getenv("CORS_ORIGINS", "http://localhost:5173,http://localhost:3000")
+CORS_ORIGINS: list[str] = [o.strip() for o in _cors_env.split(",") if o.strip()]
+# ---------------------------------------------------------------------------
+# Logging
+# ---------------------------------------------------------------------------
+LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO")

backend/app/inference/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # backend/app/inference/__init__.py

backend/app/inference/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (172 Bytes). View file

backend/app/inference/__pycache__/ensemble.cpython-312.pyc ADDED Viewed

Binary file (5.66 kB). View file

backend/app/inference/__pycache__/pipeline_a.cpython-312.pyc ADDED Viewed

Binary file (2.16 kB). View file

backend/app/inference/__pycache__/pipeline_b.cpython-312.pyc ADDED Viewed

Binary file (2.4 kB). View file

backend/app/inference/__pycache__/pipeline_c.cpython-312.pyc ADDED Viewed

Binary file (3.52 kB). View file

backend/app/inference/ensemble.py ADDED Viewed

	@@ -0,0 +1,138 @@

+"""
+Ensemble logic — orchestrates Pipelines A → B → C with confidence-based fallback.
+Strategy
+--------
+1. Always run Pipeline A (XGBoost, sub-ms).
+2. If confidence < CONFIDENCE_THRESHOLD, also run Pipeline B (Autoencoder+LGBM).
+3. Average the probability vectors from the pipelines that were run.
+4. If ensemble confidence still < SECONDARY_THRESHOLD AND image data is supplied,
+   also run Pipeline C (CNN+SVM) and include it in the average.
+5. Return the class with the highest averaged probability.
+The caller can also force a specific pipeline via the PIPELINE_MODE config.
+"""
+from __future__ import annotations
+import logging
+import time
+from typing import List, Optional, Any
+import numpy as np
+from app import config
+from app.models.label_map import get_sign
+from app.inference.pipeline_a import PredictionResult
+import app.inference.pipeline_a as _pa
+import app.inference.pipeline_b as _pb
+import app.inference.pipeline_c as _pc
+logger = logging.getLogger(__name__)
+def run(
+    landmarks: List[float],
+    *,
+    image_input: Optional[str] = None,
+    xgb_model: Any,
+    encoder_model: Any,
+    lgbm_model: Any,
+    cnn_model: Any,
+    svm_model: Any,
+    pipeline_mode: str = "ensemble",
+    confidence_threshold: float = 0.70,
+    secondary_threshold: float = 0.60,
+) -> PredictionResult:
+    """
+    Run one or more inference pipelines and return a consolidated PredictionResult.
+    Parameters
+    ----------
+    landmarks            : flat 63-element MediaPipe landmark vector
+    image_input          : optional base-64 JPEG for Pipeline C
+    xgb_model            : Pipeline A model
+    encoder_model        : Pipeline B encoder (Keras sub-model)
+    lgbm_model           : Pipeline B classifier
+    cnn_model            : Pipeline C feature extractor
+    svm_model            : Pipeline C classifier
+    pipeline_mode        : "A" | "B" | "C" | "ensemble"
+    confidence_threshold : fallback to B when A confidence < this value
+    secondary_threshold  : fallback to C when ensemble(A+B) confidence < this value
+    """
+    t0 = time.perf_counter()
+    # -----------------------------------------------------------
+    # Forced single-pipeline modes
+    # -----------------------------------------------------------
+    if pipeline_mode == "A":
+        if xgb_model is None:
+            raise RuntimeError("Pipeline A model not loaded.")
+        return _pa.predict(landmarks, xgb_model)
+    if pipeline_mode == "B":
+        if encoder_model is None or lgbm_model is None:
+            raise RuntimeError("Pipeline B models not loaded.")
+        return _pb.predict(landmarks, encoder_model, lgbm_model)
+    if pipeline_mode == "C":
+        if cnn_model is None or svm_model is None:
+            raise RuntimeError("Pipeline C models not loaded.")
+        if image_input is None:
+            raise ValueError("Pipeline C requires image_input.")
+        return _pc.predict(image_input, cnn_model, svm_model)
+    # -----------------------------------------------------------
+    # Ensemble mode (default)
+    # -----------------------------------------------------------
+    results: list[PredictionResult] = []
+    proba_stack: list[list[float]] = []
+    # Step 1 — Pipeline A (always)
+    if xgb_model is not None:
+        res_a = _pa.predict(landmarks, xgb_model)
+        results.append(res_a)
+        proba_stack.append(res_a.probabilities)
+    else:
+        logger.warning("Pipeline A not available in ensemble mode.")
+        res_a = None
+    # Step 2 — Pipeline B if A confidence is low
+    current_conf = float(np.max(np.mean(proba_stack, axis=0))) if proba_stack else 0.0
+    if current_conf < confidence_threshold and encoder_model is not None and lgbm_model is not None:
+        res_b = _pb.predict(landmarks, encoder_model, lgbm_model)
+        results.append(res_b)
+        proba_stack.append(res_b.probabilities)
+    # Step 3 — Pipeline C if still low and image provided
+    current_conf = float(np.max(np.mean(proba_stack, axis=0))) if proba_stack else 0.0
+    if (
+        current_conf < secondary_threshold
+        and image_input is not None
+        and cnn_model is not None
+        and svm_model is not None
+    ):
+        res_c = _pc.predict(image_input, cnn_model, svm_model)
+        results.append(res_c)
+        proba_stack.append(res_c.probabilities)
+    # -----------------------------------------------------------
+    # Aggregate
+    # -----------------------------------------------------------
+    if not proba_stack:
+        raise RuntimeError("No inference pipeline could be executed.")
+    avg_proba = np.mean(proba_stack, axis=0)   # shape (34,)
+    idx  = int(np.argmax(avg_proba))
+    conf = float(avg_proba[idx])
+    pipeline_labels = "+".join(r.pipeline for r in results)
+    total_latency   = (time.perf_counter() - t0) * 1000
+    return PredictionResult(
+        sign=get_sign(idx),
+        confidence=conf,
+        label_index=idx,
+        probabilities=avg_proba.tolist(),
+        pipeline=pipeline_labels if len(results) > 1 else results[0].pipeline,
+        latency_ms=total_latency,
+    )

backend/app/inference/pipeline_a.py ADDED Viewed

	@@ -0,0 +1,57 @@

+"""
+Pipeline A — XGBoost classifier on raw MediaPipe landmarks.
+Input  : 63 floats  [x0,y0,z0 … x20,y20,z20]  (already [0,1] normalised by MediaPipe)
+Output : PredictionResult
+"""
+from __future__ import annotations
+import time
+from dataclasses import dataclass
+from typing import List
+import numpy as np
+from app.models.label_map import get_sign
+@dataclass
+class PredictionResult:
+    sign: str
+    confidence: float
+    label_index: int
+    probabilities: List[float]
+    pipeline: str
+    latency_ms: float
+def predict(landmarks: List[float], xgb_model) -> PredictionResult:
+    """
+    Run XGBoost inference on a flat 63-element landmark vector.
+    Parameters
+    ----------
+    landmarks : list of 63 floats
+    xgb_model : loaded XGBClassifier instance
+    Returns
+    -------
+    PredictionResult
+    """
+    t0 = time.perf_counter()
+    X = np.array(landmarks, dtype=np.float32).reshape(1, -1)   # shape (1, 63)
+    proba = xgb_model.predict_proba(X)[0]                       # shape (34,)
+    idx   = int(np.argmax(proba))
+    conf  = float(proba[idx])
+    latency = (time.perf_counter() - t0) * 1000
+    return PredictionResult(
+        sign=get_sign(idx),
+        confidence=conf,
+        label_index=idx,
+        probabilities=proba.tolist(),
+        pipeline="A",
+        latency_ms=latency,
+    )

backend/app/inference/pipeline_b.py ADDED Viewed

	@@ -0,0 +1,59 @@

+"""
+Pipeline B — Autoencoder encoder + LightGBM classifier.
+Input  : 63 floats  [x0,y0,z0 … x20,y20,z20]
+Process: Keras encoder compresses to 16-D bottleneck → LGBMClassifier
+Output : PredictionResult
+"""
+from __future__ import annotations
+import time
+import warnings
+from dataclasses import dataclass
+from typing import List, Any
+import numpy as np
+from app.models.label_map import get_sign
+from app.inference.pipeline_a import PredictionResult
+def predict(landmarks: List[float], encoder_model: Any, lgbm_model: Any) -> PredictionResult:
+    """
+    Run the autoencoder-encoder → LightGBM inference chain.
+    Parameters
+    ----------
+    landmarks     : list of 63 floats
+    encoder_model : Keras Model (input 63→output 16, bottleneck sub-model)
+    lgbm_model    : loaded LGBMClassifier instance
+    Returns
+    -------
+    PredictionResult
+    """
+    t0 = time.perf_counter()
+    X = np.array(landmarks, dtype=np.float32).reshape(1, -1)   # (1, 63)
+    # Encode to 16-D bottleneck (suppress verbose Keras progress bar)
+    features = encoder_model(X, training=False).numpy()         # (1, 16)
+    # LightGBM classify — suppress sklearn feature-name warning (model was
+    # fitted with a named DataFrame; numpy array input is perfectly valid)
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore", UserWarning)
+        proba = lgbm_model.predict_proba(features)[0]           # (34,)
+    idx   = int(np.argmax(proba))
+    conf  = float(proba[idx])
+    latency = (time.perf_counter() - t0) * 1000
+    return PredictionResult(
+        sign=get_sign(idx),
+        confidence=conf,
+        label_index=idx,
+        probabilities=proba.tolist(),
+        pipeline="B",
+        latency_ms=latency,
+    )

backend/app/inference/pipeline_c.py ADDED Viewed

	@@ -0,0 +1,86 @@

+"""
+Pipeline C — Pre-trained CNN (ResNet50) feature extractor + SVM classifier.
+Input  : base-64 encoded JPEG string OR raw bytes of a 128×128 RGB hand-crop.
+Process: Decode → normalise → CNN (256-D features) → SVC.predict_proba
+Output : PredictionResult
+Note: This pipeline is significantly slower (~100–300 ms on CPU) and is only
+invoked as a fallback when landmark-based pipelines have low confidence.
+"""
+from __future__ import annotations
+import base64
+import io
+import time
+from dataclasses import dataclass
+from typing import Any, List, Union
+import numpy as np
+from PIL import Image
+from app.models.label_map import get_sign
+from app.inference.pipeline_a import PredictionResult
+# Target input size expected by the CNN (ResNet50 Functional model)
+CNN_IMG_SIZE: int = 128
+def _decode_image(image_input: Union[str, bytes]) -> np.ndarray:
+    """
+    Accept either:
+      - A base-64 encoded JPEG string  (from WebSocket JSON payload)
+      - Raw bytes                       (from HTTP multipart)
+    Returns a (128, 128, 3) float32 array normalised to [0, 1].
+    """
+    if isinstance(image_input, str):
+        raw = base64.b64decode(image_input)
+    else:
+        raw = image_input
+    img = Image.open(io.BytesIO(raw)).convert("RGB")
+    img = img.resize((CNN_IMG_SIZE, CNN_IMG_SIZE), Image.LANCZOS)
+    arr = np.array(img, dtype=np.float32) / 255.0
+    return arr   # (128, 128, 3)
+def predict(
+    image_input: Union[str, bytes],
+    cnn_model: Any,
+    svm_model: Any,
+) -> PredictionResult:
+    """
+    Run the CNN + SVM inference pipeline.
+    Parameters
+    ----------
+    image_input : base-64 JPEG string or raw bytes of the hand crop (any size; will be resized)
+    cnn_model   : Keras Functional model (ResNet50-based, output 256-D feature vector)
+    svm_model   : loaded SVC(C=10, probability=True) instance
+    Returns
+    -------
+    PredictionResult
+    """
+    t0 = time.perf_counter()
+    img = _decode_image(image_input)           # (128, 128, 3)
+    batch = img[np.newaxis]                    # (1, 128, 128, 3)
+    # CNN forward pass — directly call model (avoids Keras verbose logging)
+    features = cnn_model(batch, training=False).numpy()   # (1, 256)
+    proba = svm_model.predict_proba(features)[0]          # (34,)
+    idx   = int(np.argmax(proba))
+    conf  = float(proba[idx])
+    latency = (time.perf_counter() - t0) * 1000
+    return PredictionResult(
+        sign=get_sign(idx),
+        confidence=conf,
+        label_index=idx,
+        probabilities=proba.tolist(),
+        pipeline="C",
+        latency_ms=latency,
+    )

backend/app/main.py ADDED Viewed

	@@ -0,0 +1,280 @@

+"""
+SanketSetu FastAPI backend — entry point.
+Endpoints
+---------
+GET  /health                    → HealthResponse
+WS   /ws/landmarks              → real-time sign recognition (landmark stream)
+WS   /ws/image                  → image-based sign recognition (Pipeline C)
+POST /api/predict               → REST fallback for landmark inference
+POST /api/predict/image         → REST fallback for image inference
+"""
+from __future__ import annotations
+import json
+import logging
+import os
+import time
+from contextlib import asynccontextmanager
+from pathlib import Path
+from typing import Any
+# Load .env if present (before config is imported so env vars are available)
+try:
+    from dotenv import load_dotenv
+    _env_file = Path(__file__).resolve().parent.parent / ".env"
+    if _env_file.exists():
+        load_dotenv(_env_file)
+except ImportError:
+    pass  # python-dotenv not installed; rely on shell env
+import numpy as np
+from fastapi import FastAPI, WebSocket, WebSocketDisconnect, HTTPException, Request
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
+from app import config
+from app.models.loader import load_models, get_model_store
+from app.schemas import (
+    LandmarkMessage,
+    ImageMessage,
+    EnsembleMessage,
+    PredictionResponse,
+    HealthResponse,
+    ErrorResponse,
+)
+import app.inference.ensemble as ensemble
+# ---------------------------------------------------------------------------
+# Logging
+# ---------------------------------------------------------------------------
+logging.basicConfig(
+    level=getattr(logging, config.LOG_LEVEL, logging.INFO),
+    format="%(asctime)s  %(levelname)-8s  %(name)s — %(message)s",
+)
+logger = logging.getLogger("sanketsetu")
+# Silence noisy TF / Keras output
+os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL", "3")
+os.environ.setdefault("KERAS_BACKEND", "tensorflow")
+logging.getLogger("tensorflow").setLevel(logging.ERROR)
+logging.getLogger("keras").setLevel(logging.ERROR)
+# ---------------------------------------------------------------------------
+# Lifespan — load models on startup
+# ---------------------------------------------------------------------------
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    logger.info("Starting SanketSetu backend …")
+    load_models()
+    logger.info("Models ready. Server accepting connections.")
+    yield
+    logger.info("Shutting down.")
+# ---------------------------------------------------------------------------
+# App
+# ---------------------------------------------------------------------------
+app = FastAPI(
+    title="SanketSetu API",
+    description="Real-time Gujarati Sign Language recognition backend",
+    version="1.0.0",
+    lifespan=lifespan,
+)
+# CORS
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=config.CORS_ORIGINS,
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# ---------------------------------------------------------------------------
+# Global exception handler
+# ---------------------------------------------------------------------------
+@app.exception_handler(Exception)
+async def global_exception_handler(request: Request, exc: Exception):
+    logger.exception("Unhandled error: %s", exc)
+    return JSONResponse(
+        status_code=500,
+        content=ErrorResponse(error="Internal server error", detail=str(exc)).model_dump(),
+    )
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+def _run_ensemble(
+    landmarks: list[float],
+    image_b64: str | None = None,
+) -> PredictionResponse:
+    store = get_model_store()
+    result = ensemble.run(
+        landmarks,
+        image_input=image_b64,
+        xgb_model=store.xgb_model,
+        encoder_model=store.encoder_model,
+        lgbm_model=store.lgbm_model,
+        cnn_model=store.cnn_model,
+        svm_model=store.svm_model,
+        pipeline_mode=config.PIPELINE_MODE,
+        confidence_threshold=config.CONFIDENCE_THRESHOLD,
+        secondary_threshold=config.SECONDARY_THRESHOLD,
+    )
+    return PredictionResponse(
+        sign=result.sign,
+        confidence=result.confidence,
+        pipeline=result.pipeline,
+        label_index=result.label_index,
+        probabilities=result.probabilities,
+        latency_ms=result.latency_ms,
+    )
+def _available_pipelines() -> list[str]:
+    try:
+        store = get_model_store()
+    except RuntimeError:
+        return []
+    pipelines = []
+    if store.xgb_model is not None:
+        pipelines.append("A")
+    if store.encoder_model is not None and store.lgbm_model is not None:
+        pipelines.append("B")
+    if store.cnn_model is not None and store.svm_model is not None:
+        pipelines.append("C")
+    return pipelines
+# ---------------------------------------------------------------------------
+# REST endpoints
+# ---------------------------------------------------------------------------
+@app.get("/health", response_model=HealthResponse)
+async def health():
+    try:
+        store = get_model_store()
+        loaded = store.loaded
+    except RuntimeError:
+        loaded = False
+    return HealthResponse(
+        status="ok" if loaded else "loading",
+        models_loaded=loaded,
+        pipelines_available=_available_pipelines(),
+    )
+@app.post("/api/predict", response_model=PredictionResponse)
+async def predict_landmarks(body: LandmarkMessage):
+    """REST fallback: send 63 landmark floats, receive prediction."""
+    return _run_ensemble(body.landmarks)
+@app.post("/api/predict/image", response_model=PredictionResponse)
+async def predict_image(body: ImageMessage):
+    """REST fallback: send a base-64 hand crop, receive prediction via Pipeline C."""
+    store = get_model_store()
+    if store.cnn_model is None or store.svm_model is None:
+        raise HTTPException(status_code=503, detail="Pipeline C (CNN+SVM) is not available.")
+    import app.inference.pipeline_c as _pc
+    result = _pc.predict(body.image_b64, store.cnn_model, store.svm_model)
+    return PredictionResponse(
+        sign=result.sign,
+        confidence=result.confidence,
+        pipeline=result.pipeline,
+        label_index=result.label_index,
+        probabilities=result.probabilities,
+        latency_ms=result.latency_ms,
+    )
+# ---------------------------------------------------------------------------
+# WebSocket — landmark stream  /ws/landmarks
+# ---------------------------------------------------------------------------
+@app.websocket("/ws/landmarks")
+async def ws_landmarks(ws: WebSocket):
+    """
+    Primary real-time endpoint.
+    Client sends: {"landmarks": [...63 floats...], "session_id": "..."}
+    Server replies: PredictionResponse JSON
+    """
+    await ws.accept()
+    session_id = "unknown"
+    try:
+        while True:
+            raw = await ws.receive_text()
+            try:
+                data = json.loads(raw)
+                msg  = LandmarkMessage(**data)
+                session_id = msg.session_id
+                response = _run_ensemble(msg.landmarks)
+                await ws.send_text(response.model_dump_json())
+            except ValueError as ve:
+                await ws.send_text(
+                    ErrorResponse(error="Validation error", detail=str(ve)).model_dump_json()
+                )
+            except Exception as e:
+                logger.error("[%s] Inference error: %s", session_id, e, exc_info=True)
+                await ws.send_text(
+                    ErrorResponse(error="Inference failed", detail=str(e)).model_dump_json()
+                )
+    except WebSocketDisconnect:
+        logger.info("Client disconnected: %s", session_id)
+# ---------------------------------------------------------------------------
+# WebSocket — image stream  /ws/image  (Pipeline C)
+# ---------------------------------------------------------------------------
+@app.websocket("/ws/image")
+async def ws_image(ws: WebSocket):
+    """
+    Image-based endpoint for Pipeline C (CNN+SVM).
+    Client sends: {"image_b64": "<base64 JPEG>", "session_id": "..."}
+    """
+    await ws.accept()
+    session_id = "unknown"
+    try:
+        while True:
+            raw = await ws.receive_text()
+            try:
+                data = json.loads(raw)
+                msg  = ImageMessage(**data)
+                session_id = msg.session_id
+                store = get_model_store()
+                if store.cnn_model is None or store.svm_model is None:
+                    await ws.send_text(
+                        ErrorResponse(error="Pipeline C not available").model_dump_json()
+                    )
+                    continue
+                import app.inference.pipeline_c as _pc
+                result = _pc.predict(msg.image_b64, store.cnn_model, store.svm_model)
+                response = PredictionResponse(
+                    sign=result.sign,
+                    confidence=result.confidence,
+                    pipeline=result.pipeline,
+                    label_index=result.label_index,
+                    probabilities=result.probabilities,
+                    latency_ms=result.latency_ms,
+                )
+                await ws.send_text(response.model_dump_json())
+            except Exception as e:
+                logger.error("[%s] Image inference error: %s", session_id, e, exc_info=True)
+                await ws.send_text(
+                    ErrorResponse(error="Inference failed", detail=str(e)).model_dump_json()
+                )
+    except WebSocketDisconnect:
+        logger.info("Image client disconnected: %s", session_id)

backend/app/models/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # backend/app/models/__init__.py

backend/app/models/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (169 Bytes). View file

backend/app/models/__pycache__/label_map.cpython-312.pyc ADDED Viewed

Binary file (2.3 kB). View file

backend/app/models/__pycache__/loader.cpython-312.pyc ADDED Viewed

Binary file (7.72 kB). View file

backend/app/models/label_map.py ADDED Viewed

	@@ -0,0 +1,58 @@

+"""
+Label map: index 0–33 → Gujarati sign name.
+The 34 classes cover the Gujarati consonant alphabet (ક–ળ / ક-ળ) as used in the
+training dataset.  Verify this order against your original data-collection script /
+notebook — if your dataset folder names differ, update the list below.
+Current assumption: classes are sorted by the Gujarati alphabet order (Unicode order
+of the Unicode Gujarati block, U+0A80–U+0AFF).
+"""
+from __future__ import annotations
+# ---- Primary label map (index → Gujarati character / word) -----------------
+# 34 classes: consonants + a few vowel signs used as standalone signs
+LABEL_MAP: dict[int, str] = {
+    0:  "ક",   # ka
+    1:  "ખ",   # kha
+    2:  "ગ",   # ga
+    3:  "ઘ",   # gha
+    4:  "ચ",   # cha
+    5:  "છ",   # chha
+    6:  "જ",   # ja
+    7:  "ઝ",   # jha
+    8:  "ટ",   # ṭa
+    9:  "ઠ",   # ṭha
+    10: "ડ",   # ḍa
+    11: "ઢ",   # ḍha
+    12: "ણ",   # ṇa
+    13: "ત",   # ta
+    14: "થ",   # tha
+    15: "દ",   # da
+    16: "ધ",   # dha
+    17: "ન",   # na
+    18: "પ",   # pa
+    19: "ફ",   # pha
+    20: "બ",   # ba
+    21: "ભ",   # bha
+    22: "મ",   # ma
+    23: "ય",   # ya
+    24: "ર",   # ra
+    25: "લ",   # la
+    26: "વ",   # va
+    27: "શ",   # sha
+    28: "ષ",   # ṣha
+    29: "સ",   # sa
+    30: "હ",   # ha
+    31: "ળ",   # ḷa
+    32: "ક્ષ", # ksha (conjunct)
+    33: "જ્ઞ", # gna  (conjunct)
+}
+# Reverse map: sign name → index (useful for testing)
+REVERSE_MAP: dict[str, int] = {v: k for k, v in LABEL_MAP.items()}
+def get_sign(label_index: int) -> str:
+    """Return the Gujarati sign for the given class index."""
+    return LABEL_MAP.get(label_index, f"[{label_index}]")

backend/app/models/loader.py ADDED Viewed

	@@ -0,0 +1,188 @@

+"""
+Singleton model store — loads all model artifacts once at startup and holds them
+in memory for the lifetime of the process.
+Usage inside FastAPI:
+    from app.models.loader import get_model_store
+    store = get_model_store()          # dependency injection or direct call
+"""
+from __future__ import annotations
+import logging
+import os
+import pickle
+import time
+from dataclasses import dataclass, field
+from typing import Any
+import numpy as np
+logger = logging.getLogger(__name__)
+# ---------------------------------------------------------------------------
+# Data class that holds every loaded artifact
+# ---------------------------------------------------------------------------
+@dataclass
+class ModelStore:
+    # Pipeline A
+    xgb_model: Any = field(default=None)
+    # Pipeline B
+    encoder_model: Any = field(default=None)   # Keras sub-model (encoder half)
+    lgbm_model: Any = field(default=None)
+    # Pipeline C
+    cnn_model: Any = field(default=None)        # Keras ResNet50 feature extractor
+    svm_model: Any = field(default=None)
+    loaded: bool = field(default=False)
+# Module-level singleton
+_store: ModelStore | None = None
+# ---------------------------------------------------------------------------
+# Internal helpers
+# ---------------------------------------------------------------------------
+def _load_pickle(path: str, label: str) -> Any:
+    t0 = time.perf_counter()
+    with open(path, "rb") as f:
+        obj = pickle.load(f)
+    elapsed = (time.perf_counter() - t0) * 1000
+    logger.info("Loaded %-35s  (%.1f ms)", label, elapsed)
+    return obj
+def _build_encoder(autoencoder_pkl_path: str) -> Any:
+    """
+    Load the full autoencoder from pickle and extract the encoder sub-model.
+    The autoencoder is a Keras Sequential:
+        InputLayer  (63)
+        Dense 32 relu      ← layer index 0
+        Dense 16 relu      ← layer index 1  ← bottleneck output
+        Dense 32 relu
+        Dense 63 linear
+    We build a Keras Model that maps input → output of the bottleneck Dense.
+    """
+    import os
+    os.environ.setdefault("KERAS_BACKEND", "tensorflow")
+    os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL", "3")
+    full_ae = _load_pickle(autoencoder_pkl_path, "autoencoder_model.pkl")
+    # Dynamically import keras after env vars are set
+    import keras
+    # The Sequential model's built layers: 0=Dense(32), 1=Dense(16), 2=Dense(32), 3=Dense(63)
+    # layer index 1 output is the 16-D bottleneck.
+    # We can't use full_ae.input directly on a Sequential that was pickled without
+    # a traced input tensor, so we wire the layers manually.
+    import numpy as _np
+    inp = keras.Input(shape=(63,), name="encoder_input")
+    x = full_ae.layers[0](inp)   # Dense(32, relu)
+    x = full_ae.layers[1](x)     # Dense(16, relu) — bottleneck
+    encoder = keras.Model(inputs=inp, outputs=x, name="encoder_only")
+    logger.info("Built encoder sub-model: input(%s) → output(%s)", encoder.input_shape, encoder.output_shape)
+    return encoder
+def _build_cnn_feature_extractor(cnn_pkl_path: str) -> Any:
+    """
+    Load the full CNN (ResNet50 Functional model) from pickle and return a
+    sub-model that outputs the 256-D penultimate Dense layer.
+    Architecture (tail of the model):
+        … ResNet50 backbone …
+        GlobalAveragePooling2D
+        Dropout(0.5)
+        Dense(256, relu)       ← feature vector we want
+        Dropout(0.5)
+        Dense(34, softmax)     ← final classification head (skip this)
+    The SVC was trained on the 256-D features, so we must stop before the
+    final Dense(34) layer.
+    """
+    import os
+    os.environ.setdefault("KERAS_BACKEND", "tensorflow")
+    os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL", "3")
+    import keras
+    full_cnn = _load_pickle(cnn_pkl_path, "cnn_model.pkl")
+    # Find the Dense(256) layer by scanning from the end
+    feature_layer = None
+    for layer in reversed(full_cnn.layers):
+        cfg = layer.get_config()
+        if layer.__class__.__name__ == 'Dense' and cfg.get('units') == 256:
+            feature_layer = layer
+            break
+    if feature_layer is None:
+        logger.warning(
+            "Could not find Dense(256) layer; using full CNN output as features."
+        )
+        return full_cnn
+    extractor = keras.Model(
+        inputs=full_cnn.input,
+        outputs=feature_layer.output,
+        name="cnn_feature_extractor",
+    )
+    logger.info(
+        "CNN feature extractor: input %s → output %s",
+        extractor.input_shape,
+        extractor.output_shape,
+    )
+    return extractor
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+def load_models() -> ModelStore:
+    """
+    Load all models and return a populated ModelStore.
+    Call this once from the FastAPI lifespan event.
+    """
+    global _store
+    from app import config  # local import to avoid circular at module level
+    store = ModelStore()
+    # ---- Pipeline A --------------------------------------------------------
+    if os.path.exists(config.PIPELINE_A_MODEL):
+        store.xgb_model = _load_pickle(config.PIPELINE_A_MODEL, "xgb model.pkl")
+    else:
+        logger.warning("Pipeline A model not found: %s", config.PIPELINE_A_MODEL)
+    # ---- Pipeline B --------------------------------------------------------
+    if os.path.exists(config.PIPELINE_B_AE) and os.path.exists(config.PIPELINE_B_LGBM):
+        store.encoder_model = _build_encoder(config.PIPELINE_B_AE)
+        store.lgbm_model    = _load_pickle(config.PIPELINE_B_LGBM, "lgbm_model.pkl")
+    else:
+        logger.warning("Pipeline B models not found — B will be skipped.")
+    # ---- Pipeline C --------------------------------------------------------
+    if os.path.exists(config.PIPELINE_C_CNN) and os.path.exists(config.PIPELINE_C_SVM):
+        store.cnn_model = _build_cnn_feature_extractor(config.PIPELINE_C_CNN)
+        store.svm_model = _load_pickle(config.PIPELINE_C_SVM, "svm_model.pkl")
+    else:
+        logger.warning("Pipeline C models not found — C will be skipped.")
+    store.loaded = True
+    logger.info("All models loaded successfully.")
+    _store = store
+    return store
+def get_model_store() -> ModelStore:
+    """Return the singleton ModelStore (must have been loaded via load_models() first)."""
+    if _store is None or not _store.loaded:
+        raise RuntimeError("ModelStore has not been initialised — call load_models() first.")
+    return _store

backend/app/schemas.py ADDED Viewed

	@@ -0,0 +1,74 @@

+"""
+Pydantic request / response schemas for SanketSetu backend.
+"""
+from __future__ import annotations
+from typing import List, Optional
+from pydantic import BaseModel, Field, field_validator
+# ---------------------------------------------------------------------------
+# Requests
+# ---------------------------------------------------------------------------
+class LandmarkMessage(BaseModel):
+    """
+    Payload sent by the client over /ws/landmarks or POST /api/predict.
+    'landmarks' is a flat list of [x0,y0,z0, x1,y1,z1, ..., x20,y20,z20]
+    extracted by MediaPipe Hands on the browser side.
+    """
+    landmarks: List[float] = Field(..., min_length=63, max_length=63)
+    session_id: str = Field(default="default")
+    @field_validator("landmarks")
+    @classmethod
+    def must_be_63_floats(cls, v: List[float]) -> List[float]:
+        if len(v) != 63:
+            raise ValueError(f"landmarks must contain exactly 63 values, got {len(v)}")
+        return v
+class ImageMessage(BaseModel):
+    """
+    Payload sent when Pipeline C (CNN+SVM) is invoked via /ws/image.
+    'image_b64' is a base-64 encoded JPEG of the cropped hand region (128×128).
+    """
+    image_b64: str = Field(..., description="Base-64 encoded JPEG of the hand crop (128×128 px)")
+    session_id: str = Field(default="default")
+class EnsembleMessage(BaseModel):
+    """
+    Combined payload: landmarks + optional image for the full ensemble pipeline.
+    """
+    landmarks: List[float] = Field(..., min_length=63, max_length=63)
+    image_b64: Optional[str] = Field(default=None)
+    session_id: str = Field(default="default")
+# ---------------------------------------------------------------------------
+# Responses
+# ---------------------------------------------------------------------------
+class PredictionResponse(BaseModel):
+    sign: str = Field(..., description="Gujarati sign character(s)")
+    confidence: float = Field(..., ge=0.0, le=1.0)
+    pipeline: str = Field(..., description="Which pipeline(s) produced this result: A, B, C, or ensemble")
+    label_index: int = Field(..., ge=0, le=33)
+    probabilities: Optional[List[float]] = Field(
+        default=None,
+        description="Full 34-class probability vector (optional, increases payload size)"
+    )
+    latency_ms: Optional[float] = Field(default=None, description="Server-side inference latency in ms")
+class HealthResponse(BaseModel):
+    status: str
+    models_loaded: bool
+    pipelines_available: List[str]
+class ErrorResponse(BaseModel):
+    error: str
+    detail: Optional[str] = None

backend/requirements-dev.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+# Development + testing dependencies (not required in production)
+pytest>=9.0
+httpx>=0.28
+pytest-anyio

backend/requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+fastapi>=0.115.0
+uvicorn[standard]>=0.30.0
+websockets>=12.0
+xgboost>=2.0.0
+lightgbm>=4.3.0
+scikit-learn>=1.4.0
+keras==3.13.2
+tensorflow-cpu>=2.20.0
+numpy>=1.26.0
+opencv-python-headless>=4.9.0
+pillow>=10.3.0
+python-dotenv>=1.0.0

backend/tests/__init__.py ADDED Viewed

File without changes

backend/tests/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (164 Bytes). View file

backend/tests/__pycache__/conftest.cpython-312-pytest-9.0.2.pyc ADDED Viewed

Binary file (4.07 kB). View file

backend/tests/__pycache__/test_pipeline_a.cpython-312-pytest-9.0.2.pyc ADDED Viewed

Binary file (14.4 kB). View file

backend/tests/__pycache__/test_pipeline_b.cpython-312-pytest-9.0.2.pyc ADDED Viewed

Binary file (11.6 kB). View file