Spaces:

Abs6187
/

ISL_Sign_Language_Translation

Sleeping

App Files Files Community

Abs6187 commited on Oct 10, 2025

Commit

e2cffd9

verified ·

1 Parent(s): b575737

Upload 16 files

Browse files

Files changed (17) hide show

.gitattributes +5 -0
.gitignore +86 -0
DataPipeline.png +3 -0
LICENSE +40 -0
app.py +824 -0
categories_processed.png +0 -0
eda/distribution_of_data.png +3 -0
eda/train_test_validation_split-1.png +3 -0
eda/train_test_validation_split-2.png +3 -0
expression_mapping.py +168 -0
isl_processor.py +478 -0
model-graph.png +3 -0
packages.txt +6 -0
pose_models.py +360 -0
pose_utils.py +468 -0
requirements.txt +22 -3
verify_deployment.py +140 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+DataPipeline.png filter=lfs diff=lfs merge=lfs -text
+eda/distribution_of_data.png filter=lfs diff=lfs merge=lfs -text
+eda/train_test_validation_split-1.png filter=lfs diff=lfs merge=lfs -text
+eda/train_test_validation_split-2.png filter=lfs diff=lfs merge=lfs -text
+model-graph.png filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,86 @@

+# TechMatrix Solvers ISL Translation Project
+# Generated files and dependencies
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyTorch
+*.pth
+*.pt
+# Jupyter Notebook
+.ipynb_checkpoints
+# Environment variables
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# IDEs
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+# OS generated files
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
+# Temporary files
+*.tmp
+*.temp
+/tmp/
+temp/
+# Model files and data
+*.keras
+*.h5
+*.pkl
+*.csv
+*.json
+data/
+models/
+checkpoints/
+# Video files
+*.mp4
+*.avi
+*.mov
+*.mkv
+# Logs
+logs/
+*.log
+# Original project reference (keep for development)
+original_project/

DataPipeline.png ADDED Viewed

Git LFS Details

SHA256: ac5e3ed87f8911e09cd69dfe6bbea7d52eea34663e2001cc54026020bc7251ea
Pointer size: 131 Bytes
Size of remote file: 631 kB

LICENSE ADDED Viewed

	@@ -0,0 +1,40 @@

+MIT License
+Copyright (c) 2024 TechMatrix Solvers
+Shri Ram Group of Institutions
+Team Members:
+- Abhay Gupta (Team Lead)
+- Kripanshu Gupta (Backend Developer)
+- Dipanshu Patel (UI/UX Designer)
+- Bhumika Patel (Deployment & Female Presenter)
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+## Acknowledgments
+This project is based on Indian Sign Language (ISL) translation using deep learning
+techniques including OpenPose body/hand detection and LSTM networks. The project
+uses the INCLUDE dataset for training and evaluation.
+## Attribution
+While this is an original implementation by TechMatrix Solvers, the underlying
+concepts and methodologies are based on established computer vision and machine
+learning research in sign language recognition.

app.py ADDED Viewed

	@@ -0,0 +1,824 @@

+"""
+ISL Sign Language Translation - TechMatrix Solvers Initiative
+Main Streamlit Application
+Developed by: TechMatrix Solvers Team
+- Abhay Gupta (Team Lead)
+- Kripanshu Gupta (Backend Developer)
+- Dipanshu Patel (UI/UX Designer)
+- Bhumika Patel (Deployment & Female Presenter)
+Institution: Shri Ram Group of Institutions
+"""
+import streamlit as st
+st.write("🚀 TechMatrix Solvers ISL Translator Loading...")
+import os
+os.environ["KERAS_BACKEND"] = "torch"
+import keras
+import cv2
+import numpy as np
+import tempfile
+import time
+from PIL import Image
+from keras.models import Sequential
+import pickle
+from keras.layers import LSTM, Dense, Bidirectional, Dropout, Input, BatchNormalization
+from pose_models import create_bodypose_model, create_handpose_model
+from expression_mapping import expression_mapping
+from isl_processor import ISLTranslationModel
+import pandas as pd
+import ffmpeg
+import subprocess
+from typing import NamedTuple
+import json
+import pose_utils as utils
+from huggingface_hub import hf_hub_download
+import shutil, platform
+import uuid
+# System information display
+st.write("🔧 **System Information:**")
+st.write(f"Python Version: {platform.python_version()}")
+st.write(f"FFmpeg: {shutil.which('ffmpeg')}, FFprobe: {shutil.which('ffprobe')}")
+try:
+    import cv2
+    st.write(f"OpenCV Version: {cv2.__version__}")
+except Exception as e:
+    st.error(f"OpenCV import failed: {e}")
+try:
+    import torch
+    st.write(f"PyTorch: {torch.__version__}, Keras: {keras.__version__}")
+except Exception as e:
+    st.error(f"PyTorch/Keras import failed: {e}")
+class VideoProbeResult(NamedTuple):
+    """Structure for video probe results"""
+    return_code: int
+    json: str
+    error: str
+def probe_video_info(file_path) -> VideoProbeResult:
+    """
+    Probe video file for metadata using FFprobe
+    Args:
+        file_path: Path to video file
+    Returns:
+        VideoProbeResult containing metadata
+    """
+    command_array = [
+        "ffprobe",
+        "-v", "quiet",
+        "-print_format", "json",
+        "-show_format",
+        "-show_streams",
+        file_path
+    ]
+    result = subprocess.run(
+        command_array,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        universal_newlines=True
+    )
+    return VideoProbeResult(
+        return_code=result.returncode,
+        json=result.stdout,
+        error=result.stderr
+    )
+# Define feature columns for time series processing
+body_features = [f'bodypeaks_x_{i}' for i in range(15)] + [f'bodypeaks_y_{i}' for i in range(15)]
+hand0_features = [f'hand0peaks_x_{i}' for i in range(21)] + [f'hand0peaks_y_{i}' for i in range(21)] + [f'hand0peaks_peaktxt{i}' for i in range(21)]
+hand1_features = [f'hand1peaks_x_{i}' for i in range(21)] + [f'hand1peaks_y_{i}' for i in range(21)] + [f'hand1peaks_peaktxt{i}' for i in range(21)]
+feature_columns_processed = body_features + hand0_features + hand1_features
+label_columns = ['Expression_encoded']
+@st.cache_resource
+def create_time_series_sequences(isl_data, feature_columns, label_columns, window_size=20):
+    """
+    Creates time series sequences from DataFrame with specified window size
+    Args:
+        isl_data: Input DataFrame with ISL data
+        feature_columns: List of feature column names
+        label_columns: List of label column names
+        window_size: Size of temporal window for sequence creation
+    Returns:
+        tuple: (X_sequences, y_sequences) for training/inference
+    """
+    if isl_data.empty:
+        return [], []
+    X_sequences = []
+    y_sequences = []
+    for group, file_df in isl_data.groupby(['Type', 'Expression_encoded', 'FileName']):
+        expr_type, expression, filename = group
+        # Create blank frame for padding
+        blank_frame = np.zeros((1, 156))
+        for idx, window_data in enumerate([file_df[i:i+window_size] for i in range(0, file_df.shape[0], 1)]):
+            if window_data.shape[0] < window_size:
+                # Pad sequence with blank frames at the beginning
+                padding_needed = window_size - window_data.shape[0]
+                padded_sequence = np.concatenate(
+                    (np.repeat(blank_frame, padding_needed, axis=0),
+                     window_data[feature_columns].values),
+                    axis=0
+                )
+                X_sequences.append(padded_sequence)
+                y_sequences.append(expression)
+                continue
+            X_sequences.append(window_data[feature_columns].values)
+            y_sequences.append(expression)
+    return X_sequences, y_sequences
+# Global translation model variable
+translation_model = None
+@st.cache_resource
+def load_translation_model():
+    """
+    Load and configure the LSTM translation model
+    Returns:
+        Configured Keras Sequential model for ISL translation
+    """
+    model = Sequential()
+    model.add(Input(shape=((20, 156))))
+    model.add(keras.layers.Masking(mask_value=0.))
+    model.add(BatchNormalization())
+    model.add(Bidirectional(LSTM(32, recurrent_dropout=0.2, return_sequences=True)))
+    model.add(Dropout(0.2))
+    model.add(Bidirectional(LSTM(32, recurrent_dropout=0.2)))
+    model.add(keras.layers.Activation('elu'))
+    model.add(Dense(32, use_bias=False, kernel_initializer='he_normal'))
+    model.add(BatchNormalization())
+    model.add(Dropout(0.2))
+    model.add(keras.layers.Activation('elu'))
+    model.add(Dense(32, kernel_initializer='he_normal', use_bias=False))
+    model.add(BatchNormalization())
+    model.add(keras.layers.Activation('elu'))
+    model.add(Dropout(0.2))
+    model.add(Dense(len(list(expression_mapping.keys())), activation='softmax'))
+    # Download pre-trained model weights
+    model_file = hf_hub_download(
+        repo_id="sunilsarolkar/isl-translation-model",
+        filename="isl_model_final.keras"
+    )
+    model.load_weights(model_file)
+    return model
+# Load test data
+@st.cache_data
+def load_test_data():
+    """Load test dataset and file information"""
+    testing_cleaned_path = hf_hub_download(
+        repo_id="sunilsarolkar/isl-test-data",
+        filename="testing_cleaned.csv",
+        repo_type="dataset"
+    )
+    test_files_path = hf_hub_download(
+        repo_id="sunilsarolkar/isl-test-data",
+        filename="test_files.csv",
+        repo_type="dataset"
+    )
+    testing_df = pd.read_csv(testing_cleaned_path)
+    test_files_df = pd.read_csv(test_files_path)
+    return testing_df, test_files_df
+# Load test data
+testing_df, test_files_df = load_test_data()
+class VideoWriter:
+    """Custom video writer using FFmpeg for better compatibility"""
+    def __init__(self, output_file, input_fps, input_framesize, input_pix_fmt, input_vcodec):
+        self.ff_process = (
+            ffmpeg
+            .input('pipe:',
+                   format='rawvideo',
+                   pix_fmt="bgr24",
+                   s=f'{input_framesize[1]}x{input_framesize[0]}',
+                   r=input_fps)
+            .output(output_file, pix_fmt=input_pix_fmt, vcodec=input_vcodec)
+            .overwrite_output()
+            .run_async(pipe_stdin=True)
+        )
+    def write_frame(self, frame):
+        """Write a single frame to the video"""
+        self.ff_process.stdin.write(frame.tobytes())
+    def close(self):
+        """Close the video writer"""
+        self.ff_process.stdin.close()
+        self.ff_process.wait()
+def calculate_weighted_average(numbers, weights):
+    """
+    Calculate weighted average of numbers
+    Args:
+        numbers: List of numbers
+        weights: List of weights
+    Returns:
+        float: Weighted average
+    """
+    if sum(weights) == 0:
+        return 0
+    return sum(x * y for x, y in zip(numbers, weights)) / sum(weights)
+@st.cache_data
+def resize_image(image, width=None, height=None, interpolation=cv2.INTER_AREA):
+    """
+    Resize image maintaining aspect ratio
+    Args:
+        image: Input image
+        width: Target width
+        height: Target height
+        interpolation: OpenCV interpolation method
+    Returns:
+        Resized image
+    """
+    dimensions = None
+    (h, w) = image.shape[:2]
+    if width is None and height is None:
+        return image
+    if width is None:
+        ratio = height / float(h)
+        dimensions = (int(w * ratio), height)
+    else:
+        ratio = width / float(w)
+        dimensions = (width, int(h * ratio))
+    resized = cv2.resize(image, dimensions, interpolation=interpolation)
+    return resized
+# Configure Streamlit page
+st.set_page_config(
+    page_title="ISL Translation - TechMatrix Solvers",
+    page_icon="🤟",
+    layout="wide"
+)
+st.title('🤟 ISL Sign Language Translation - TechMatrix Solvers Initiative')
+# Add custom CSS for sidebar styling
+st.markdown(
+    """
+    <style>
+    [data-testid="stSidebar"][aria-expanded="true"] > div:first-child {
+        width: 350px;
+    }
+    [data-testid="stSidebar"][aria-expanded="false"] > div:first-child {
+        width: 350px;
+        margin-left: -350px;
+    }
+    .team-info {
+        background-color: #f0f2f6;
+        padding: 1rem;
+        border-radius: 0.5rem;
+        margin: 1rem 0;
+    }
+    .tech-matrix-header {
+        background: linear-gradient(90deg, #1e3a8a, #7c3aed);
+        color: white;
+        padding: 1rem;
+        border-radius: 0.5rem;
+        text-align: center;
+        margin-bottom: 1rem;
+    }
+    </style>
+    """,
+    unsafe_allow_html=True,
+)
+# Add team branding header
+st.markdown(
+    """
+    <div class="tech-matrix-header">
+        <h2>🚀 TechMatrix Solvers</h2>
+        <p>Innovating Accessible Technology Solutions</p>
+    </div>
+    """,
+    unsafe_allow_html=True
+)
+# Sidebar configuration
+st.sidebar.title('🤟 ISL Translation System')
+st.sidebar.subheader('Configuration')
+# Team information in sidebar
+st.sidebar.markdown(
+    """
+    <div class="team-info">
+    <h3>👨‍💻 Development Team</h3>
+    <ul>
+    <li><strong>Abhay Gupta</strong> - Team Lead</li>
+    <li><strong>Kripanshu Gupta</strong> - Backend Dev</li>
+    <li><strong>Dipanshu Patel</strong> - UI/UX Designer</li>
+    <li><strong>Bhumika Patel</strong> - Deployment</li>
+    </ul>
+    <p><em>Shri Ram Group of Institutions</em></p>
+    </div>
+    """,
+    unsafe_allow_html=True
+)
+# Initialize frame-wise outputs storage
+frame_predictions = {}
+# Application mode selection
+app_mode = st.sidebar.selectbox(
+    'Choose Application Mode',
+    ['About Project', 'Test Video Translation']
+)
+if app_mode == 'About Project':
+    st.markdown(
+        """
+        ## 🎯 Project Overview
+        Welcome to the **ISL Sign Language Translation System** developed by **TechMatrix Solvers**.
+        This cutting-edge application demonstrates real-time Indian Sign Language recognition and
+        translation using advanced deep learning techniques.
+        ### 🏗️ Technical Architecture
+        Our system combines multiple state-of-the-art technologies:
+        1. **Body Pose Estimation**: 25-point skeletal tracking using OpenPose
+        2. **Hand Landmark Detection**: 21-point hand keypoint identification
+        3. **Temporal Modeling**: Bidirectional LSTM networks for sequence analysis
+        4. **Real-time Processing**: Optimized inference pipeline for live translation
+        """
+    )
+    st.markdown(
+        """
+        ### 📊 Dataset Information
+        Our model is trained on the comprehensive [INCLUDE dataset](https://zenodo.org/records/4010759):
+        """
+    )
+    # Dataset statistics table
+    dataset_stats = {
+        "Metric": [
+            "Categories", "Total Words", "Training Videos",
+            "Avg Videos/Class", "Avg Video Length", "Resolution", "Frame Rate"
+        ],
+        "Value": [
+            "15", "263", "4,257", "16.3", "2.57s", "1920x1080", "25fps"
+        ]
+    }
+    st.table(pd.DataFrame(dataset_stats))
+    # Display dataset processing visualization
+    try:
+        categories_image = np.array(Image.open('original_project/categories_processed.png'))
+        st.image(categories_image, caption="📈 Processed Categories Distribution")
+    except:
+        st.info("📊 Dataset visualization images will be displayed when available")
+    # Model architecture information
+    st.markdown(
+        """
+        ### 🧠 Neural Network Architecture
+        ```python
+        # TechMatrix Solvers LSTM Translation Model
+        model = Sequential([
+            Input(shape=(20, 156)),  # 20-frame temporal window
+            Masking(mask_value=0.),
+            BatchNormalization(),
+            Bidirectional(LSTM(32, recurrent_dropout=0.2, return_sequences=True)),
+            Dropout(0.2),
+            Bidirectional(LSTM(32, recurrent_dropout=0.2)),
+            Dense(32, activation='elu'),
+            BatchNormalization(),
+            Dropout(0.2),
+            Dense(len(expression_mapping), activation='softmax')
+        ])
+        ```
+        **Model Statistics:**
+        - Total Parameters: 82,679 (322.96 KB)
+        - Trainable Parameters: 82,239 (321.25 KB)
+        - Input Features: 156-dimensional vectors
+        - Temporal Window: 20 frames
+        """
+    )
+    # Technology stack
+    col1, col2 = st.columns(2)
+    with col1:
+        st.markdown(
+            """
+            ### 🛠️ Technology Stack
+            **Frontend & UI:**
+            - Streamlit (Interactive Web App)
+            - Custom CSS Styling
+            - Responsive Design
+            **Deep Learning:**
+            - Keras/TensorFlow Backend
+            - PyTorch Integration
+            - LSTM Networks
+            - OpenPose Models
+            """
+        )
+    with col2:
+        st.markdown(
+            """
+            ### 📱 Key Features
+            **Real-time Processing:**
+            - Live video analysis
+            - Pose keypoint extraction
+            - Temporal sequence modeling
+            - Confidence scoring
+            **User Experience:**
+            - Intuitive interface
+            - Visual feedback
+            - Progress tracking
+            - Result visualization
+            """
+        )
+    # Team contact information
+    st.markdown(
+        """
+        ### 📞 Contact Information
+        **TechMatrix Solvers Team:**
+        | Name | Role | Email | Phone |
+        |------|------|-------|--------|
+        | **Abhay Gupta** | Team Lead | contact2abhaygupta6187@gmail.com | 8115814535 |
+        | **Kripanshu Gupta** | Backend Developer | guptakripanshu83@gmail.com | 7067058400 |
+        | **Dipanshu Patel** | UI/UX Designer | dipanshupatel43@gmail.com | 9294526404 |
+        | **Bhumika Patel** | Deployment & Presenter | bp7249951@gmail.com | 9302271422 |
+        **Institution:** Shri Ram Group of Institutions
+        ### 📚 Documentation
+        For detailed technical documentation and implementation details, please refer to our
+        [comprehensive documentation](https://docs.google.com/document/d/1mzr2KGHRJT5heUjFF20NQ3Gb89urpjZJ/edit?usp=sharing).
+        ---
+        **© 2024 TechMatrix Solvers - Innovating Accessible Technology Solutions**
+        """
+    )
+elif app_mode == 'Test Video Translation':
+    # Video selection interface
+    st.markdown("## 🎥 Test Video Translation")
+    category = st.sidebar.selectbox(
+        'Choose Category',
+        np.sort(test_files_df['Category'].unique(), axis=-1, kind='mergesort')
+    )
+    # Filter by category
+    category_mask = (test_files_df['Category'] == category)
+    test_files_category = test_files_df[category_mask]
+    class_name = st.sidebar.selectbox(
+        'Choose Class',
+        np.sort(test_files_category['Class'].unique(), axis=-1, kind='mergesort')
+    )
+    # Filter by class
+    class_mask = (test_files_df['Class'] == class_name)
+    filename = st.sidebar.selectbox(
+        'Choose File',
+        np.sort(test_files_category[class_mask]['Filename'].unique(), axis=-1, kind='mergesort')
+    )
+    # Display selection info
+    st.info(f"📂 Selected: {category} → {class_name} → {filename}")
+    if st.sidebar.button("🚀 Start Translation", type="primary"):
+        # Filter test data for selected video
+        data_mask = ((testing_df['FileName'] == filename) &
+                    (testing_df['Type'] == category) &
+                    (testing_df['Expression'] == class_name))
+        window_size = 20
+        current_test_data = testing_df[data_mask]
+        if current_test_data.empty:
+            st.error(f"⚠️ No matching data found for: {filename} | {category} | {class_name}")
+            st.stop()
+        else:
+            st.success(f"✅ Loaded {current_test_data.shape[0]} frames for processing")
+        # Create time series data
+        X_test_processed, y_test_processed = create_time_series_sequences(
+            current_test_data, feature_columns_processed, label_columns, window_size=window_size
+        )
+        X_test_processed = np.array(X_test_processed)
+        # Configure Streamlit display options
+        st.set_option('deprecation.showfileUploaderEncoding', False)
+        st.sidebar.markdown('---')
+        st.markdown(
+            """
+            <style>
+            [data-testid="stSidebar"][aria-expanded="true"] > div:first-child {
+                width: 400px;
+            }
+            [data-testid="stSidebar"][aria-expanded="false"] > div:first-child {
+                width: 400px;
+                margin-left: -400px;
+            }
+            </style>
+            """,
+            unsafe_allow_html=True,
+        )
+        st.sidebar.markdown('---')
+        st.markdown('## 📊 Translation Results')
+        # Progress tracking container
+        progress_container = st.empty()
+        with progress_container.container():
+            progress_df = pd.DataFrame([['--', '--']],
+                                     columns=['Frames Processed', 'Detected Sign'])
+            progress_table = st.table(progress_df)
+        # Video display container
+        video_display = st.empty()
+        st.markdown("<hr/>", unsafe_allow_html=True)
+        frame_display = st.empty()
+        # Download test video
+        video_file_path = hf_hub_download(
+            repo_id="sunilsarolkar/isl-test-data",
+            filename=f'test/{category}/{class_name}/{filename}',
+            repo_type="dataset"
+        )
+        if not os.path.exists(video_file_path):
+            st.error(f"⚠️ Video file not found: {video_file_path}")
+            st.stop()
+        # Initialize video capture
+        video_capture = cv2.VideoCapture(video_file_path)
+        # Get video metadata
+        probe_result = probe_video_info(video_file_path)
+        video_info = json.loads(probe_result.json)
+        video_stream = [stream for stream in video_info["streams"] if stream["codec_type"] == "video"][0]
+        input_fps = video_stream["avg_frame_rate"]
+        input_pix_fmt = video_stream["pix_fmt"]
+        input_vcodec = video_stream["codec_name"]
+        format_name = video_info["format"]["format_name"].split(",")[0]
+        # Video properties
+        width = int(video_capture.get(cv2.CAP_PROP_FRAME_WIDTH))
+        height = int(video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        fps_input = int(video_capture.get(cv2.CAP_PROP_FPS))
+        # Processing variables
+        total_frames = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
+        frame_buffer = []
+        # Output video configuration
+        output_file = f"/tmp/techmatrix_output_{uuid.uuid4().hex}.{format_name}"
+        video_writer = None
+        weighted_predictions = {}
+        frame_idx = 0
+        try:
+            # Process each frame
+            for _, frame_data in current_test_data.iterrows():
+                if not video_capture.isOpened():
+                    st.error(f"❌ Could not open video: {video_file_path}")
+                    break
+                if video_capture.isOpened():
+                    ret, frame = video_capture.read()
+                    if len(frame_buffer) < window_size:
+                        # Initial frames - build up buffer
+                        visualization_canvas = utils.render_stick_model(
+                            frame,
+                            eval(frame_data['bodypose_circles']),
+                            eval(frame_data['bodypose_sticks']),
+                            eval(frame_data['handpose_edges']),
+                            eval(frame_data['handpose_peaks'])
+                        )
+                        # Add prediction plots
+                        canvas_with_predictions = utils.create_bar_plot_visualization(
+                            visualization_canvas, {},
+                            f'Building Buffer - Frame {frame_idx + 1} [No Predictions Yet]',
+                            visualization_canvas
+                        )
+                        canvas_with_predictions = utils.create_bar_plot_visualization(
+                            canvas_with_predictions, weighted_predictions,
+                            f'Weighted Average - Frame {frame_idx + 1} [No Predictions Yet]',
+                            visualization_canvas
+                        )
+                        canvas_with_predictions = utils.add_bottom_padding(
+                            canvas_with_predictions, (255, 255, 255), 100
+                        )
+                        # Initialize video writer
+                        if video_writer is None:
+                            input_framesize = canvas_with_predictions.shape[:2]
+                            video_writer = VideoWriter(output_file, input_fps, input_framesize,
+                                                     input_pix_fmt, input_vcodec)
+                        video_writer.write_frame(canvas_with_predictions)
+                        # Update progress display
+                        with progress_container.container():
+                            progress_df = pd.DataFrame(
+                                [[f'{frame_idx + 1}/{current_test_data.shape[0]}',
+                                  '<Building 20-frame buffer>']],
+                                columns=['Frames Processed', 'Detected Sign']
+                            )
+                            progress_table = st.table(progress_df)
+                        frame_buffer.append(frame)
+                        # Display current frame
+                        with video_display.container():
+                            st.image(canvas_with_predictions, channels='BGR', use_column_width=True)
+                    else:
+                        # Process with full buffer - make predictions
+                        frame_buffer[:-1] = frame_buffer[1:]
+                        frame_buffer[-1] = frame
+                        # Load translation model
+                        translation_model = load_translation_model()
+                        # Make prediction on current window
+                        sequence_idx = frame_idx - 20
+                        prediction_output = translation_model(
+                            X_test_processed[sequence_idx].reshape(
+                                1, X_test_processed[sequence_idx].shape[0],
+                                X_test_processed[sequence_idx].shape[1]
+                            )
+                        )
+                        prediction_output = prediction_output[0].cpu().detach().numpy()
+                        # Get top predictions
+                        top_prediction_idx = np.argmax(prediction_output)
+                        top_3_indices = prediction_output.argsort()[-3:][::-1]
+                        top_3_signs = [expression_mapping[i] for i in top_3_indices]
+                        top_3_probabilities = prediction_output[top_3_indices]
+                        # Update frame-wise predictions for weighted average
+                        for sign, prob in zip(top_3_signs, top_3_probabilities):
+                            if sign not in frame_predictions:
+                                frame_predictions[sign] = []
+                            frame_predictions[sign].append(prob)
+                        # Current frame predictions
+                        current_predictions = {}
+                        for sign, prob in zip(top_3_signs, top_3_probabilities):
+                            current_predictions[sign] = prob
+                        # Calculate weighted averages
+                        for sign in frame_predictions:
+                            sign_predictions = frame_predictions[sign]
+                            sign_weights = [len(sign_predictions) for _ in range(len(sign_predictions))]
+                            weighted_predictions[sign] = calculate_weighted_average(
+                                sign_predictions, sign_weights
+                            )
+                        # Sort predictions by confidence
+                        sorted_predictions = dict(
+                            sorted(weighted_predictions.items(), key=lambda item: item[1], reverse=True)
+                        )
+                        # Create visualization
+                        visualization_canvas = utils.render_stick_model(
+                            frame,
+                            eval(frame_data['bodypose_circles']),
+                            eval(frame_data['bodypose_sticks']),
+                            eval(frame_data['handpose_edges']),
+                            eval(frame_data['handpose_peaks'])
+                        )
+                        # Add prediction visualizations
+                        canvas_with_predictions = utils.create_bar_plot_visualization(
+                            visualization_canvas, current_predictions,
+                            f'Current Window Prediction (Frames {sequence_idx + 1}-{frame_idx + 1})',
+                            visualization_canvas
+                        )
+                        canvas_with_predictions = utils.create_bar_plot_visualization(
+                            canvas_with_predictions, weighted_predictions,
+                            f'Cumulative Weighted Average - Frame {frame_idx + 1}',
+                            visualization_canvas
+                        )
+                        canvas_with_predictions = utils.add_bottom_padding(
+                            canvas_with_predictions, (255, 255, 255), 100
+                        )
+                        video_writer.write_frame(canvas_with_predictions)
+                        # Get best prediction for display
+                        best_sign = max(weighted_predictions, key=weighted_predictions.get)
+                        best_confidence = weighted_predictions[best_sign]
+                        # Update progress display
+                        with progress_container.container():
+                            progress_df = pd.DataFrame(
+                                [[f'{frame_idx + 1}/{current_test_data.shape[0]}',
+                                  f'{best_sign} ({best_confidence * 100:.2f}%)']],
+                                columns=['Frames Processed', 'Detected Sign']
+                            )
+                            progress_table = st.table(progress_df)
+                        # Display current frame
+                        with video_display.container():
+                            st.image(canvas_with_predictions, channels='BGR', use_column_width=True)
+                    frame_idx += 1
+            # Finalize video processing
+            st.success("✅ Video processing completed!")
+            with video_display.container():
+                if video_writer is not None:
+                    video_writer.close()
+                    with open(output_file, 'rb') as video_file:
+                        output_video_bytes = video_file.read()
+                    st.video(output_video_bytes)
+                    st.info(f"💾 Processed video saved: {output_file}")
+                else:
+                    st.warning("⚠️ No video output generated")
+        finally:
+            # Clean up resources
+            video_capture.release()
+            if video_writer is not None:
+                video_writer.close()
+            cv2.destroyAllWindows()
+# Footer
+st.markdown(
+    """
+    ---
+    <div style="text-align: center; color: #666;">
+    <p><strong>TechMatrix Solvers</strong> | Shri Ram Group of Institutions</p>
+    <p>Innovating Accessible Technology Solutions for Everyone 🚀</p>
+    </div>
+    """,
+    unsafe_allow_html=True
+)

categories_processed.png ADDED Viewed

eda/distribution_of_data.png ADDED Viewed

Git LFS Details

SHA256: 9d967d19e6cd0962ed58d7ed6f13db2c504fdf8e4374ee4745568ca7b6caf393
Pointer size: 131 Bytes
Size of remote file: 462 kB

eda/train_test_validation_split-1.png ADDED Viewed

Git LFS Details

SHA256: e7b1fa109930615195557a188bacb77560c5dec94f52a82693d772a56af97643
Pointer size: 131 Bytes
Size of remote file: 115 kB

eda/train_test_validation_split-2.png ADDED Viewed

Git LFS Details

SHA256: 9ec1608762a566112bdcd49e82cba4a64405e6beefd9d2cd3fe8913fc6efe871
Pointer size: 131 Bytes
Size of remote file: 171 kB

expression_mapping.py ADDED Viewed

	@@ -0,0 +1,168 @@

+expression_mapping={107: "alive",
+58: "Nice",
+8: "Beautiful",
+115: "dead",
+120: "famous",
+122: "female",
+51: "Mean",
+21: "Deaf",
+111: "clean",
+117: "dirty",
+123: "flat",
+110: "cheap",
+119: "expensive",
+116: "deep",
+99: "Ugly",
+114: "curved",
+12: "Blind",
+142: "poor",
+138: "male",
+126: "hard",
+133: "light",
+137: "low",
+113: "cool",
+144: "rich",
+109: "big large",
+108: "bad",
+112: "cold",
+135: "loose",
+121: "fast",
+141: "old",
+130: "high",
+118: "dry",
+145: "sad",
+131: "hot",
+125: "happy",
+129: "heavy",
+128: "healthy",
+124: "good",
+146: "shallow",
+153: "strong",
+161: "weak",
+157: "thin",
+158: "tight",
+136: "loud",
+139: "narrow",
+134: "long",
+156: "thick",
+148: "short",
+152: "soft",
+150: "slow",
+151: "small little",
+149: "sick",
+154: "tall",
+140: "new",
+143: "quiet",
+95: "Today",
+163: "wide",
+159: "warm",
+96: "Tomorrow",
+162: "wet",
+1: "Afternoon",
+27: "Evening",
+56: "Morning",
+59: "Night",
+166: "young",
+53: "Minute",
+38: "Hour",
+88: "Sunday",
+55: "Month",
+94: "Time",
+70: "Pleased",
+63: "Paper",
+105: "Year",
+80: "Second",
+32: "Gift",
+102: "Week",
+43: "Key",
+48: "Lock",
+4: "Bag",
+106: "Yesterday",
+7: "Bathroom",
+15: "Card",
+66: "Pen",
+45: "Letter",
+9: "Bed",
+2: "Alright",
+67: "Pencil",
+24: "Dream",
+13: "Book",
+44: "Kitchen",
+92: "Telephone",
+23: "Door",
+36: "Hello",
+61: "Page",
+40: "How are you",
+16: "Chair",
+89: "Table",
+97: "Tool",
+68: "Photograph",
+10: "Bedroom",
+103: "Window",
+62: "Paint",
+14: "Box",
+76: "Ring",
+82: "Soap",
+20: "Crowd",
+75: "Restaurant",
+98: "Train Station",
+31: "Friend",
+17: "Child",
+0: "Adult",
+46: "Library",
+39: "House",
+42: "India",
+86: "Street or Road",
+72: "Queen",
+85: "Store or Shop",
+64: "Park",
+77: "School",
+18: "City",
+49: "Market",
+60: "Office",
+132: "it",
+41: "I",
+6: "Bank",
+69: "Player",
+147: "she",
+19: "Court",
+155: "they",
+104: "Winter",
+93: "Temple",
+33: "God",
+50: "Marriage",
+29: "Exercise",
+37: "Hospital",
+34: "Ground",
+25: "Election",
+73: "Race (ethnicity)",
+11: "Bill",
+87: "Summer",
+160: "we",
+127: "he",
+22: "Death",
+84: "Spring",
+47: "Location",
+26: "Energy",
+54: "Money",
+28: "Ex. Monsoon",
+165: "you (plural)",
+65: "Peace",
+5: "Ball",
+71: "Price",
+35: "Gun",
+30: "Fall",
+164: "you",
+81: "Sign",
+100: "University",
+83: "Sport",
+74: "Religion",
+101: "War",
+57: "Newspaper",
+3: "Attack",
+90: "Team",
+78: "Science",
+79: "Season",
+52: "Medicine",
+91: "Technology",
+}

isl_processor.py ADDED Viewed

	@@ -0,0 +1,478 @@

+"""
+ISL Sign Language Translation - TechMatrix Solvers Initiative
+Core ISL Processing and Translation Models
+Developed by: TechMatrix Solvers Team
+- Abhay Gupta (Team Lead)
+- Kripanshu Gupta (Backend Developer)
+- Dipanshu Patel (UI/UX Designer)
+- Bhumika Patel (Deployment & Female Presenter)
+Institution: Shri Ram Group of Institutions
+"""
+import keras
+from keras.layers import TorchModuleWrapper
+import numpy as np
+import cv2
+import torch
+from scipy.ndimage.filters import gaussian_filter
+import math
+import os
+from skimage.measure import label
+import pose_utils as utils
+class ISLPoseEstimator(keras.Model):
+    """
+    ISL Pose Estimation Model combining body and hand pose detection
+    Developed by TechMatrix Solvers for accurate sign language recognition
+    """
+    def __init__(self, pytorch_body_model, pytorch_hand_model):
+        super().__init__()
+        self.pytorch_body_wrapper = TorchModuleWrapper(pytorch_body_model)
+        self.pytorch_body_wrapper.trainable = False
+        self.pytorch_hand_wrapper = TorchModuleWrapper(pytorch_hand_model)
+        self.pytorch_hand_wrapper.trainable = False
+        self.num_body_joints = 26
+        self.num_body_pafs = 52
+    def call(self, input_image):
+        """
+        Process input image and extract pose information
+        Args:
+            input_image: Input image tensor
+        Returns:
+            tuple: (body_candidates, body_subset, hand_peaks)
+        """
+        candidate, subset = self.extract_body_pose(input_image.cpu().numpy())
+        hand_regions = utils.detect_hand_regions(candidate, subset, input_image.cpu().numpy())
+        all_hand_keypoints = []
+        for x, y, w, is_left in hand_regions:
+            hand_peaks = self.extract_hand_pose(input_image.cpu().numpy()[y:y+w, x:x+w, :])
+            hand_peaks[:, 0] = np.where(hand_peaks[:, 0] == 0, hand_peaks[:, 0], hand_peaks[:, 0] + x)
+            hand_peaks[:, 1] = np.where(hand_peaks[:, 1] == 0, hand_peaks[:, 1], hand_peaks[:, 1] + y)
+            all_hand_keypoints.append(hand_peaks)
+        return candidate, subset, all_hand_keypoints
+    def extract_body_pose(self, input_image):
+        """
+        Extract body pose keypoints from input image
+        Args:
+            input_image: Input image array
+        Returns:
+            tuple: (candidates, subset) containing pose information
+        """
+        model_type = 'body25'
+        scale_factors = [0.5]
+        box_size = 368
+        stride = 8
+        padding_value = 128
+        threshold_1 = 0.1
+        threshold_2 = 0.05
+        # Calculate scale multipliers
+        multiplier = [x * box_size / input_image.shape[0] for x in scale_factors]
+        heatmap_average = np.zeros((input_image.shape[0], input_image.shape[1], self.num_body_joints))
+        paf_average = np.zeros((input_image.shape[0], input_image.shape[1], self.num_body_pafs))
+        for m in range(len(multiplier)):
+            scale = multiplier[m]
+            test_image = cv2.resize(input_image, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
+            padded_image, pad = utils.pad_image_corner(test_image, stride, padding_value)
+            # Prepare image tensor
+            image_tensor = np.transpose(np.float32(padded_image[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5
+            image_tensor = np.ascontiguousarray(image_tensor)
+            # Convert to PyTorch tensor
+            data = torch.from_numpy(image_tensor).float()
+            if torch.cuda.is_available():
+                data = data.cuda()
+            with torch.no_grad():
+                stage6_L1, stage6_L2 = self.pytorch_body_wrapper(data)
+            stage6_L1 = stage6_L1.cpu().numpy()
+            stage6_L2 = stage6_L2.cpu().numpy()
+            # Process heatmaps
+            heatmap = np.transpose(np.squeeze(stage6_L2), (1, 2, 0))
+            heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
+            heatmap = heatmap[:padded_image.shape[0] - pad[2], :padded_image.shape[1] - pad[3], :]
+            heatmap = cv2.resize(heatmap, (input_image.shape[1], input_image.shape[0]), interpolation=cv2.INTER_CUBIC)
+            # Process PAFs (Part Affinity Fields)
+            paf = np.transpose(np.squeeze(stage6_L1), (1, 2, 0))
+            paf = cv2.resize(paf, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
+            paf = paf[:padded_image.shape[0] - pad[2], :padded_image.shape[1] - pad[3], :]
+            paf = cv2.resize(paf, (input_image.shape[1], input_image.shape[0]), interpolation=cv2.INTER_CUBIC)
+            heatmap_average += heatmap / len(multiplier)
+            paf_average += paf / len(multiplier)
+        # Extract peaks from heatmaps
+        all_peaks = []
+        peak_counter = 0
+        for part in range(self.num_body_joints - 1):
+            original_map = heatmap_average[:, :, part]
+            smoothed_heatmap = gaussian_filter(original_map, sigma=3)
+            # Find local maxima
+            left_map = np.zeros(smoothed_heatmap.shape)
+            left_map[1:, :] = smoothed_heatmap[:-1, :]
+            right_map = np.zeros(smoothed_heatmap.shape)
+            right_map[:-1, :] = smoothed_heatmap[1:, :]
+            up_map = np.zeros(smoothed_heatmap.shape)
+            up_map[:, 1:] = smoothed_heatmap[:, :-1]
+            down_map = np.zeros(smoothed_heatmap.shape)
+            down_map[:, :-1] = smoothed_heatmap[:, 1:]
+            peaks_binary = np.logical_and.reduce(
+                (smoothed_heatmap >= left_map, smoothed_heatmap >= right_map,
+                 smoothed_heatmap >= up_map, smoothed_heatmap >= down_map,
+                 smoothed_heatmap > threshold_1)
+            )
+            peaks = list(zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0]))
+            peaks_with_score = [x + (original_map[x[1], x[0]],) for x in peaks]
+            peak_id = range(peak_counter, peak_counter + len(peaks))
+            peaks_with_score_and_id = [peaks_with_score[i] + (peak_id[i],) for i in range(len(peak_id))]
+            all_peaks.append(peaks_with_score_and_id)
+            peak_counter += len(peaks)
+        # Define limb connections for body25 model
+        if model_type == 'body25':
+            limb_sequence = [
+                [1,0],[1,2],[2,3],[3,4],[1,5],[5,6],[6,7],[1,8],[8,9],[9,10],
+                [10,11],[8,12],[12,13],[13,14],[0,15],[0,16],[15,17],[16,18],
+                [11,24],[11,22],[14,21],[14,19],[22,23],[19,20]
+            ]
+            map_index = [
+                [30,31],[14,15],[16,17],[18,19],[22,23],[24,25],[26,27],[0,1],[6,7],
+                [2,3],[4,5],[8,9],[10,11],[12,13],[32,33],[34,35],[36,37],[38,39],
+                [50,51],[46,47],[44,45],[40,41],[48,49],[42,43]
+            ]
+        # Find connections between body parts
+        connection_all = []
+        special_k = []
+        mid_num = 10
+        for k in range(len(map_index)):
+            score_mid = paf_average[:, :, map_index[k]]
+            candA = all_peaks[limb_sequence[k][0]]
+            candB = all_peaks[limb_sequence[k][1]]
+            nA = len(candA)
+            nB = len(candB)
+            indexA, indexB = limb_sequence[k]
+            if nA != 0 and nB != 0:
+                connection_candidate = []
+                for i in range(nA):
+                    for j in range(nB):
+                        vec = np.subtract(candB[j][:2], candA[i][:2])
+                        norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1])
+                        norm = max(0.001, norm)
+                        vec = np.divide(vec, norm)
+                        startend = list(zip(
+                            np.linspace(candA[i][0], candB[j][0], num=mid_num),
+                            np.linspace(candA[i][1], candB[j][1], num=mid_num)
+                        ))
+                        vec_x = np.array([
+                            score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0]
+                            for I in range(len(startend))
+                        ])
+                        vec_y = np.array([
+                            score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1]
+                            for I in range(len(startend))
+                        ])
+                        score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1])
+                        score_with_dist_prior = (sum(score_midpts) / len(score_midpts) +
+                                               min(0.5 * input_image.shape[0] / norm - 1, 0))
+                        criterion1 = len(np.nonzero(score_midpts > threshold_2)[0]) > 0.8 * len(score_midpts)
+                        criterion2 = score_with_dist_prior > 0
+                        if criterion1 and criterion2:
+                            connection_candidate.append([
+                                i, j, score_with_dist_prior,
+                                score_with_dist_prior + candA[i][2] + candB[j][2]
+                            ])
+                connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True)
+                connection = np.zeros((0, 5))
+                for c in range(len(connection_candidate)):
+                    i, j, s = connection_candidate[c][0:3]
+                    if i not in connection[:, 3] and j not in connection[:, 4]:
+                        connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]])
+                        if len(connection) >= min(nA, nB):
+                            break
+                connection_all.append(connection)
+            else:
+                special_k.append(k)
+                connection_all.append([])
+        # Create human pose subsets
+        subset = -1 * np.ones((0, self.num_body_joints + 1))
+        candidate = np.array([item for sublist in all_peaks for item in sublist])
+        for k in range(len(map_index)):
+            if k not in special_k:
+                partAs = connection_all[k][:, 0]
+                partBs = connection_all[k][:, 1]
+                indexA, indexB = np.array(limb_sequence[k])
+                for i in range(len(connection_all[k])):
+                    found = 0
+                    subset_idx = [-1, -1]
+                    for j in range(len(subset)):
+                        if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]:
+                            subset_idx[found] = j
+                            found += 1
+                    if found == 1:
+                        j = subset_idx[0]
+                        if subset[j][indexB] != partBs[i]:
+                            subset[j][indexB] = partBs[i]
+                            subset[j][-1] += 1
+                            subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
+                    elif found == 2:
+                        j1, j2 = subset_idx
+                        membership = ((subset[j1] >= 0).astype(int) + (subset[j2] >= 0).astype(int))[:-2]
+                        if len(np.nonzero(membership == 2)[0]) == 0:
+                            subset[j1][:-2] += (subset[j2][:-2] + 1)
+                            subset[j1][-2:] += subset[j2][-2:]
+                            subset[j1][-2] += connection_all[k][i][2]
+                            subset = np.delete(subset, j2, 0)
+                        else:
+                            subset[j1][indexB] = partBs[i]
+                            subset[j1][-1] += 1
+                            subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
+                    elif not found and k < self.num_body_joints - 2:
+                        row = -1 * np.ones(self.num_body_joints + 1)
+                        row[indexA] = partAs[i]
+                        row[indexB] = partBs[i]
+                        row[-1] = 2
+                        row[-2] = sum(candidate[connection_all[k][i, :2].astype(int), 2]) + connection_all[k][i][2]
+                        subset = np.vstack([subset, row])
+        # Filter out low-quality detections
+        deleteIdx = []
+        for i in range(len(subset)):
+            if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4:
+                deleteIdx.append(i)
+        subset = np.delete(subset, deleteIdx, axis=0)
+        return candidate, subset
+    def extract_hand_pose(self, input_image):
+        """
+        Extract hand pose keypoints from input image region
+        Args:
+            input_image: Cropped hand region image
+        Returns:
+            numpy.ndarray: Hand keypoint coordinates
+        """
+        scale_factors = [0.5, 1.0, 1.5, 2.0]
+        box_size = 368
+        stride = 8
+        padding_value = 128
+        threshold = 0.05
+        multiplier = [x * box_size / input_image.shape[0] for x in scale_factors]
+        heatmap_average = np.zeros((input_image.shape[0], input_image.shape[1], 22))
+        for m in range(len(multiplier)):
+            scale = multiplier[m]
+            test_image = cv2.resize(input_image, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
+            padded_image, pad = utils.pad_image_corner(test_image, stride, padding_value)
+            # Prepare image tensor
+            image_tensor = np.transpose(np.float32(padded_image[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5
+            image_tensor = np.ascontiguousarray(image_tensor)
+            data = torch.from_numpy(image_tensor).float()
+            if torch.cuda.is_available():
+                data = data.cuda()
+            with torch.no_grad():
+                output = self.pytorch_hand_wrapper(data).cpu().numpy()
+            # Process heatmaps
+            heatmap = np.transpose(np.squeeze(output), (1, 2, 0))
+            heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
+            heatmap = heatmap[:padded_image.shape[0] - pad[2], :padded_image.shape[1] - pad[3], :]
+            heatmap = cv2.resize(heatmap, (input_image.shape[1], input_image.shape[0]), interpolation=cv2.INTER_CUBIC)
+            heatmap_average += heatmap / len(multiplier)
+        # Extract hand keypoints
+        all_peaks = []
+        for part in range(21):
+            original_map = heatmap_average[:, :, part]
+            smoothed_heatmap = gaussian_filter(original_map, sigma=3)
+            binary = np.ascontiguousarray(smoothed_heatmap > threshold, dtype=np.uint8)
+            if np.sum(binary) == 0:
+                all_peaks.append([0, 0])
+                continue
+            label_img, label_numbers = label(binary, return_num=True, connectivity=binary.ndim)
+            max_index = np.argmax([np.sum(original_map[label_img == i]) for i in range(1, label_numbers + 1)]) + 1
+            label_img[label_img != max_index] = 0
+            original_map[label_img == 0] = 0
+            y, x = utils.find_array_maximum(original_map)
+            all_peaks.append([x, y])
+        return np.array(all_peaks)
+class ISLTranslationModel(keras.Model):
+    """
+    Complete ISL Translation Model combining pose estimation and LSTM translation
+    Developed by TechMatrix Solvers for end-to-end sign language translation
+    """
+    def __init__(self, body_model, hand_model, translation_model):
+        super().__init__()
+        self.pytorch_body_wrapper = TorchModuleWrapper(body_model)
+        self.pytorch_body_wrapper.trainable = False
+        self.pytorch_hand_wrapper = TorchModuleWrapper(hand_model)
+        self.pytorch_hand_wrapper.trainable = False
+        self.num_body_joints = 26
+        self.num_body_pafs = 52
+        self.model_type = 'body25'
+        self.translation_network = translation_model
+    def call(self, frame_sequence):
+        """
+        Process a sequence of frames and return translation prediction
+        Args:
+            frame_sequence: Sequence of video frames
+        Returns:
+            Translation prediction probabilities
+        """
+        window_size = 20
+        feature_sequence = []
+        blank_frame = np.zeros((1, 156))
+        for idx, frame in enumerate(frame_sequence.cpu()):
+            # Extract pose features from current frame
+            candidate, subset = self.extract_body_pose(frame.cpu().numpy())
+            hand_regions = utils.detect_hand_regions(candidate, subset, frame.cpu().numpy())
+            all_hand_keypoints = []
+            for x, y, w, is_left in hand_regions:
+                peaks = self.extract_hand_pose(frame.cpu().numpy()[y:y+w, x:x+w, :])
+                peaks[:, 0] = np.where(peaks[:, 0] == 0, peaks[:, 0], peaks[:, 0] + x)
+                peaks[:, 1] = np.where(peaks[:, 1] == 0, peaks[:, 1], peaks[:, 1] + y)
+                all_hand_keypoints.append(peaks)
+            # Extract structured pose data
+            body_circles, body_sticks = utils.extract_body_pose_data(candidate, subset, self.model_type)
+            hand_edges, hand_peaks = utils.extract_hand_pose_data(all_hand_keypoints)
+            # Convert to feature vector
+            feature_vector = self.create_feature_vector(body_circles, hand_peaks)
+            feature_sequence.append(feature_vector)
+        # Pad sequence if needed
+        if len(feature_sequence) < window_size:
+            for _ in range(window_size - len(feature_sequence)):
+                feature_sequence.append(blank_frame)
+        # Run translation model
+        return self.translation_network(np.array(feature_sequence).reshape(1, 20, 156))
+    def create_feature_vector(self, body_circles, hand_peaks):
+        """
+        Create feature vector from pose data
+        Args:
+            body_circles: Body keypoint coordinates
+            hand_peaks: Hand keypoint data
+        Returns:
+            numpy.ndarray: 156-dimensional feature vector
+        """
+        features = []
+        # Body keypoint x-coordinates (15 points)
+        for idx in range(15):
+            if idx < len(body_circles):
+                features.append(body_circles[idx][0])
+            else:
+                features.append(0)
+        # Body keypoint y-coordinates (15 points)
+        for idx in range(15):
+            if idx < len(body_circles):
+                features.append(body_circles[idx][1])
+            else:
+                features.append(0)
+        # Hand features for both hands
+        for hand_idx in range(2):
+            # Hand x-coordinates (21 points)
+            for idx in range(21):
+                if idx < len(hand_peaks[hand_idx]):
+                    features.append(float(hand_peaks[hand_idx][idx][0]))
+                else:
+                    features.append(0)
+            # Hand y-coordinates (21 points)
+            for idx in range(21):
+                if idx < len(hand_peaks[hand_idx]):
+                    features.append(float(hand_peaks[hand_idx][idx][1]))
+                else:
+                    features.append(0)
+            # Hand peak text/confidence (21 points)
+            for idx in range(21):
+                if idx < len(hand_peaks[hand_idx]):
+                    features.append(float(hand_peaks[hand_idx][idx][2]))
+                else:
+                    features.append(0)
+        return np.array(features)
+    def extract_body_pose(self, input_image):
+        """Extract body pose - same implementation as ISLPoseEstimator"""
+        # This method would contain the same implementation as in ISLPoseEstimator
+        # For brevity, using a placeholder that calls the same logic
+        pose_estimator = ISLPoseEstimator(None, None)
+        pose_estimator.pytorch_body_wrapper = self.pytorch_body_wrapper
+        pose_estimator.num_body_joints = self.num_body_joints
+        pose_estimator.num_body_pafs = self.num_body_pafs
+        return pose_estimator.extract_body_pose(input_image)
+    def extract_hand_pose(self, input_image):
+        """Extract hand pose - same implementation as ISLPoseEstimator"""
+        # This method would contain the same implementation as in ISLPoseEstimator
+        # For brevity, using a placeholder that calls the same logic
+        pose_estimator = ISLPoseEstimator(None, None)
+        pose_estimator.pytorch_hand_wrapper = self.pytorch_hand_wrapper
+        return pose_estimator.extract_hand_pose(input_image)

model-graph.png ADDED Viewed

Git LFS Details

SHA256: b6851b6c85a8f927ba048ee910edc4011e880a6844432cb0930400d8de5cc0d4
Pointer size: 131 Bytes
Size of remote file: 761 kB

packages.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+ffmpeg
+libgl1
+libglib2.0-0
+libsm6
+libxrender1
+libxext6

pose_models.py ADDED Viewed

	@@ -0,0 +1,360 @@

+"""
+ISL Sign Language Translation - TechMatrix Solvers Initiative
+Model definitions for body pose and hand pose estimation
+Developed by: TechMatrix Solvers Team
+"""
+import torch
+from collections import OrderedDict
+import torch.nn as nn
+def construct_layers(layer_config, no_relu_layers, prelu_layers=[]):
+    """
+    Constructs neural network layers based on configuration
+    Args:
+        layer_config: Dictionary defining layer parameters
+        no_relu_layers: List of layers that shouldn't have ReLU activation
+        prelu_layers: List of layers that should use PReLU instead of ReLU
+    """
+    layers = []
+    for layer_name, params in layer_config.items():
+        if 'pool' in layer_name:
+            layer = nn.MaxPool2d(kernel_size=params[0], stride=params[1], padding=params[2])
+            layers.append((layer_name, layer))
+        else:
+            conv2d = nn.Conv2d(
+                in_channels=params[0],
+                out_channels=params[1],
+                kernel_size=params[2],
+                stride=params[3],
+                padding=params[4]
+            )
+            layers.append((layer_name, conv2d))
+            if layer_name not in no_relu_layers:
+                if layer_name not in prelu_layers:
+                    layers.append(('relu_' + layer_name, nn.ReLU(inplace=True)))
+                else:
+                    layers.append(('prelu' + layer_name[4:], nn.PReLU(params[1])))
+    return nn.Sequential(OrderedDict(layers))
+def construct_multi_conv_layers(layer_config, no_relu_layers):
+    """
+    Constructs multiple convolution layers for complex architectures
+    """
+    modules = []
+    for layer_name, params in layer_config.items():
+        layers = []
+        if 'pool' in layer_name:
+            layer = nn.MaxPool2d(kernel_size=params[0], stride=params[1], padding=params[2])
+            layers.append((layer_name, layer))
+        else:
+            conv2d = nn.Conv2d(
+                in_channels=params[0],
+                out_channels=params[1],
+                kernel_size=params[2],
+                stride=params[3],
+                padding=params[4]
+            )
+            layers.append((layer_name, conv2d))
+            if layer_name not in no_relu_layers:
+                layers.append(('Mprelu' + layer_name[5:], nn.PReLU(params[1])))
+        modules.append(nn.Sequential(OrderedDict(layers)))
+    return nn.ModuleList(modules)
+class BodyPose25Model(nn.Module):
+    """
+    Body pose estimation model using 25-point skeleton
+    Developed by TechMatrix Solvers for ISL translation
+    """
+    def __init__(self):
+        super(BodyPose25Model, self).__init__()
+        # Define layers without ReLU activation
+        no_relu_layers = [
+            'Mconv7_stage0_L1', 'Mconv7_stage0_L2',
+            'Mconv7_stage1_L1', 'Mconv7_stage1_L2',
+            'Mconv7_stage2_L2', 'Mconv7_stage3_L2'
+        ]
+        prelu_layers = ['conv4_2', 'conv4_3_CPM', 'conv4_4_CPM']
+        # Initial feature extraction layers
+        base_layers = OrderedDict([
+            ('conv1_1', [3, 64, 3, 1, 1]),
+            ('conv1_2', [64, 64, 3, 1, 1]),
+            ('pool1_stage1', [2, 2, 0]),
+            ('conv2_1', [64, 128, 3, 1, 1]),
+            ('conv2_2', [128, 128, 3, 1, 1]),
+            ('pool2_stage1', [2, 2, 0]),
+            ('conv3_1', [128, 256, 3, 1, 1]),
+            ('conv3_2', [256, 256, 3, 1, 1]),
+            ('conv3_3', [256, 256, 3, 1, 1]),
+            ('conv3_4', [256, 256, 3, 1, 1]),
+            ('pool3_stage1', [2, 2, 0]),
+            ('conv4_1', [256, 512, 3, 1, 1]),
+            ('conv4_2', [512, 512, 3, 1, 1]),
+            ('conv4_3_CPM', [512, 256, 3, 1, 1]),
+            ('conv4_4_CPM', [256, 128, 3, 1, 1])
+        ])
+        self.base_model = construct_layers(base_layers, no_relu_layers, prelu_layers)
+        # Multi-stage refinement blocks
+        stage_blocks = {}
+        # L2 branch - Stage 0
+        stage_blocks['Mconv1_stage0_L2'] = OrderedDict([
+            ('Mconv1_stage0_L2_0', [128, 96, 3, 1, 1]),
+            ('Mconv1_stage0_L2_1', [96, 96, 3, 1, 1]),
+            ('Mconv1_stage0_L2_2', [96, 96, 3, 1, 1])
+        ])
+        for i in range(2, 6):
+            stage_blocks[f'Mconv{i}_stage0_L2'] = OrderedDict([
+                (f'Mconv{i}_stage0_L2_0', [288, 96, 3, 1, 1]),
+                (f'Mconv{i}_stage0_L2_1', [96, 96, 3, 1, 1]),
+                (f'Mconv{i}_stage0_L2_2', [96, 96, 3, 1, 1])
+            ])
+        stage_blocks['Mconv6_7_stage0_L2'] = OrderedDict([
+            ('Mconv6_stage0_L2', [288, 256, 1, 1, 0]),
+            ('Mconv7_stage0_L2', [256, 52, 1, 1, 0])
+        ])
+        # L2 branch - Stages 1-3
+        for stage in range(1, 4):
+            stage_blocks[f'Mconv1_stage{stage}_L2'] = OrderedDict([
+                (f'Mconv1_stage{stage}_L2_0', [180, 128, 3, 1, 1]),
+                (f'Mconv1_stage{stage}_L2_1', [128, 128, 3, 1, 1]),
+                (f'Mconv1_stage{stage}_L2_2', [128, 128, 3, 1, 1])
+            ])
+            for i in range(2, 6):
+                stage_blocks[f'Mconv{i}_stage{stage}_L2'] = OrderedDict([
+                    (f'Mconv{i}_stage{stage}_L2_0', [384, 128, 3, 1, 1]),
+                    (f'Mconv{i}_stage{stage}_L2_1', [128, 128, 3, 1, 1]),
+                    (f'Mconv{i}_stage{stage}_L2_2', [128, 128, 3, 1, 1])
+                ])
+            stage_blocks[f'Mconv6_7_stage{stage}_L2'] = OrderedDict([
+                (f'Mconv6_stage{stage}_L2', [384, 512, 1, 1, 0]),
+                (f'Mconv7_stage{stage}_L2', [512, 52, 1, 1, 0])
+            ])
+        # L1 branch configurations
+        stage_blocks['Mconv1_stage0_L1'] = OrderedDict([
+            ('Mconv1_stage0_L1_0', [180, 96, 3, 1, 1]),
+            ('Mconv1_stage0_L1_1', [96, 96, 3, 1, 1]),
+            ('Mconv1_stage0_L1_2', [96, 96, 3, 1, 1])
+        ])
+        for i in range(2, 6):
+            stage_blocks[f'Mconv{i}_stage0_L1'] = OrderedDict([
+                (f'Mconv{i}_stage0_L1_0', [288, 96, 3, 1, 1]),
+                (f'Mconv{i}_stage0_L1_1', [96, 96, 3, 1, 1]),
+                (f'Mconv{i}_stage0_L1_2', [96, 96, 3, 1, 1])
+            ])
+        stage_blocks['Mconv6_7_stage0_L1'] = OrderedDict([
+            ('Mconv6_stage0_L1', [288, 256, 1, 1, 0]),
+            ('Mconv7_stage0_L1', [256, 26, 1, 1, 0])
+        ])
+        stage_blocks['Mconv1_stage1_L1'] = OrderedDict([
+            ('Mconv1_stage1_L1_0', [206, 128, 3, 1, 1]),
+            ('Mconv1_stage1_L1_1', [128, 128, 3, 1, 1]),
+            ('Mconv1_stage1_L1_2', [128, 128, 3, 1, 1])
+        ])
+        for i in range(2, 6):
+            stage_blocks[f'Mconv{i}_stage1_L1'] = OrderedDict([
+                (f'Mconv{i}_stage1_L1_0', [384, 128, 3, 1, 1]),
+                (f'Mconv{i}_stage1_L1_1', [128, 128, 3, 1, 1]),
+                (f'Mconv{i}_stage1_L1_2', [128, 128, 3, 1, 1])
+            ])
+        stage_blocks['Mconv6_7_stage1_L1'] = OrderedDict([
+            ('Mconv6_stage1_L1', [384, 512, 1, 1, 0]),
+            ('Mconv7_stage1_L1', [512, 26, 1, 1, 0])
+        ])
+        # Build multi-conv modules
+        for block_name in stage_blocks.keys():
+            stage_blocks[block_name] = construct_multi_conv_layers(stage_blocks[block_name], no_relu_layers)
+        self.stage_models = nn.ModuleDict(stage_blocks)
+        # Freeze parameters for efficiency
+        for param in self.parameters():
+            param.requires_grad = False
+    def _multi_conv_forward(self, x, models):
+        """Forward pass through multi-convolution blocks"""
+        outputs = []
+        current_output = x
+        for model in models:
+            current_output = model(current_output)
+            outputs.append(current_output)
+        return torch.cat(outputs, 1)
+    def forward(self, x):
+        """Forward pass through the body pose model"""
+        base_features = self.base_model(x)
+        # L2 branch processing
+        current_features = base_features
+        for stage in range(4):
+            current_features = self._multi_conv_forward(
+                current_features, self.stage_models[f'Mconv1_stage{stage}_L2']
+            )
+            for layer in range(2, 6):
+                current_features = self._multi_conv_forward(
+                    current_features, self.stage_models[f'Mconv{layer}_stage{stage}_L2']
+                )
+            current_features = self.stage_models[f'Mconv6_7_stage{stage}_L2'][0](current_features)
+            current_features = self.stage_models[f'Mconv6_7_stage{stage}_L2'][1](current_features)
+            l2_output = current_features
+            current_features = torch.cat([base_features, current_features], 1)
+        # L1 branch - Stage 0
+        current_features = self._multi_conv_forward(
+            current_features, self.stage_models['Mconv1_stage0_L1']
+        )
+        for layer in range(2, 6):
+            current_features = self._multi_conv_forward(
+                current_features, self.stage_models[f'Mconv{layer}_stage0_L1']
+            )
+        current_features = self.stage_models['Mconv6_7_stage0_L1'][0](current_features)
+        current_features = self.stage_models['Mconv6_7_stage0_L1'][1](current_features)
+        stage0_l1_output = current_features
+        current_features = torch.cat([base_features, stage0_l1_output, l2_output], 1)
+        # L1 branch - Stage 1
+        current_features = self._multi_conv_forward(
+            current_features, self.stage_models['Mconv1_stage1_L1']
+        )
+        for layer in range(2, 6):
+            current_features = self._multi_conv_forward(
+                current_features, self.stage_models[f'Mconv{layer}_stage1_L1']
+            )
+        current_features = self.stage_models['Mconv6_7_stage1_L1'][0](current_features)
+        stage1_l1_output = self.stage_models['Mconv6_7_stage1_L1'][1](current_features)
+        return l2_output, stage1_l1_output
+class HandPoseModel(nn.Module):
+    """
+    Hand pose estimation model using 21-point hand landmarks
+    Developed by TechMatrix Solvers for ISL translation
+    """
+    def __init__(self):
+        super(HandPoseModel, self).__init__()
+        # Layers without ReLU activation
+        no_relu_layers = [
+            'conv6_2_CPM', 'Mconv7_stage2', 'Mconv7_stage3',
+            'Mconv7_stage4', 'Mconv7_stage5', 'Mconv7_stage6'
+        ]
+        # Stage 1 - Feature extraction
+        stage1_base = OrderedDict([
+            ('conv1_1', [3, 64, 3, 1, 1]),
+            ('conv1_2', [64, 64, 3, 1, 1]),
+            ('pool1_stage1', [2, 2, 0]),
+            ('conv2_1', [64, 128, 3, 1, 1]),
+            ('conv2_2', [128, 128, 3, 1, 1]),
+            ('pool2_stage1', [2, 2, 0]),
+            ('conv3_1', [128, 256, 3, 1, 1]),
+            ('conv3_2', [256, 256, 3, 1, 1]),
+            ('conv3_3', [256, 256, 3, 1, 1]),
+            ('conv3_4', [256, 256, 3, 1, 1]),
+            ('pool3_stage1', [2, 2, 0]),
+            ('conv4_1', [256, 512, 3, 1, 1]),
+            ('conv4_2', [512, 512, 3, 1, 1]),
+            ('conv4_3', [512, 512, 3, 1, 1]),
+            ('conv4_4', [512, 512, 3, 1, 1]),
+            ('conv5_1', [512, 512, 3, 1, 1]),
+            ('conv5_2', [512, 512, 3, 1, 1]),
+            ('conv5_3_CPM', [512, 128, 3, 1, 1])
+        ])
+        stage1_prediction = OrderedDict([
+            ('conv6_1_CPM', [128, 512, 1, 1, 0]),
+            ('conv6_2_CPM', [512, 22, 1, 1, 0])
+        ])
+        stage_blocks = {}
+        stage_blocks['stage1_base'] = stage1_base
+        stage_blocks['stage1_prediction'] = stage1_prediction
+        # Stages 2-6 refinement
+        for i in range(2, 7):
+            stage_blocks[f'stage{i}'] = OrderedDict([
+                (f'Mconv1_stage{i}', [150, 128, 7, 1, 3]),
+                (f'Mconv2_stage{i}', [128, 128, 7, 1, 3]),
+                (f'Mconv3_stage{i}', [128, 128, 7, 1, 3]),
+                (f'Mconv4_stage{i}', [128, 128, 7, 1, 3]),
+                (f'Mconv5_stage{i}', [128, 128, 7, 1, 3]),
+                (f'Mconv6_stage{i}', [128, 128, 1, 1, 0]),
+                (f'Mconv7_stage{i}', [128, 22, 1, 1, 0])
+            ])
+        # Build all stage models
+        for block_name in stage_blocks.keys():
+            stage_blocks[block_name] = construct_layers(stage_blocks[block_name], no_relu_layers)
+        self.stage1_base_model = stage_blocks['stage1_base']
+        self.stage1_prediction_model = stage_blocks['stage1_prediction']
+        self.stage2_model = stage_blocks['stage2']
+        self.stage3_model = stage_blocks['stage3']
+        self.stage4_model = stage_blocks['stage4']
+        self.stage5_model = stage_blocks['stage5']
+        self.stage6_model = stage_blocks['stage6']
+        # Freeze parameters for efficiency
+        for param in self.parameters():
+            param.requires_grad = False
+    def forward(self, x):
+        """Forward pass through the hand pose model"""
+        base_features = self.stage1_base_model(x)
+        stage1_output = self.stage1_prediction_model(base_features)
+        # Stage 2
+        stage2_input = torch.cat([stage1_output, base_features], 1)
+        stage2_output = self.stage2_model(stage2_input)
+        # Stage 3
+        stage3_input = torch.cat([stage2_output, base_features], 1)
+        stage3_output = self.stage3_model(stage3_input)
+        # Stage 4
+        stage4_input = torch.cat([stage3_output, base_features], 1)
+        stage4_output = self.stage4_model(stage4_input)
+        # Stage 5
+        stage5_input = torch.cat([stage4_output, base_features], 1)
+        stage5_output = self.stage5_model(stage5_input)
+        # Stage 6
+        stage6_input = torch.cat([stage5_output, base_features], 1)
+        stage6_output = self.stage6_model(stage6_input)
+        return stage6_output
+# Factory functions for easy model instantiation
+def create_bodypose_model():
+    """Create and return body pose detection model"""
+    return BodyPose25Model()
+def create_handpose_model():
+    """Create and return hand pose detection model"""
+    return HandPoseModel()

pose_utils.py ADDED Viewed

	@@ -0,0 +1,468 @@

+"""
+ISL Sign Language Translation - TechMatrix Solvers Initiative
+Utility functions for pose processing and visualization
+Developed by: TechMatrix Solvers Team
+"""
+import numpy as np
+import math
+import cv2
+import matplotlib
+from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
+from matplotlib.figure import Figure
+import matplotlib.pyplot as plt
+import copy
+import seaborn as sns
+def pad_image_corner(img, stride, pad_value):
+    """
+    Pad image to ensure dimensions are divisible by stride
+    Args:
+        img: Input image array
+        stride: Stride value for padding calculation
+        pad_value: Value to use for padding
+    """
+    h, w = img.shape[:2]
+    pad = [0, 0, 0, 0]  # [up, left, down, right]
+    pad[2] = 0 if (h % stride == 0) else stride - (h % stride)  # down
+    pad[3] = 0 if (w % stride == 0) else stride - (w % stride)  # right
+    img_padded = img
+    # Add padding
+    if pad[0] > 0:  # up
+        pad_up = np.tile(img_padded[0:1, :, :] * 0 + pad_value, (pad[0], 1, 1))
+        img_padded = np.concatenate((pad_up, img_padded), axis=0)
+    if pad[1] > 0:  # left
+        pad_left = np.tile(img_padded[:, 0:1, :] * 0 + pad_value, (1, pad[1], 1))
+        img_padded = np.concatenate((pad_left, img_padded), axis=1)
+    if pad[2] > 0:  # down
+        pad_down = np.tile(img_padded[-2:-1, :, :] * 0 + pad_value, (pad[2], 1, 1))
+        img_padded = np.concatenate((img_padded, pad_down), axis=0)
+    if pad[3] > 0:  # right
+        pad_right = np.tile(img_padded[:, -2:-1, :] * 0 + pad_value, (1, pad[3], 1))
+        img_padded = np.concatenate((img_padded, pad_right), axis=1)
+    return img_padded, pad
+def transfer_model_weights(model, model_weights):
+    """
+    Transfer weights from caffe model to pytorch model format
+    Args:
+        model: PyTorch model
+        model_weights: Dictionary of weights from caffe model
+    """
+    transferred_weights = {}
+    for weights_name in model.state_dict().keys():
+        if len(weights_name.split('.')) > 4:  # body25 format
+            transferred_weights[weights_name] = model_weights['.'.join(
+                weights_name.split('.')[3:])]
+        else:
+            transferred_weights[weights_name] = model_weights['.'.join(
+                weights_name.split('.')[1:])]
+    return transferred_weights
+def draw_body_pose_visualization(canvas, candidate, subset, model_type='body25'):
+    """
+    Draw body pose keypoints and connections on image
+    Args:
+        canvas: Image to draw on
+        candidate: Detected keypoint candidates
+        subset: Valid keypoint connections
+        model_type: Type of pose model ('body25' or 'coco')
+    """
+    stick_width = 4
+    if model_type == 'body25':
+        limb_sequence = [
+            [1,0],[1,2],[2,3],[3,4],[1,5],[5,6],[6,7],[1,8],[8,9],[9,10],
+            [10,11],[8,12],[12,13],[13,14],[0,15],[0,16],[15,17],[16,18],
+            [11,24],[11,22],[14,21],[14,19],[22,23],[19,20]
+        ]
+        num_joints = 25
+    else:
+        limb_sequence = [
+            [1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9],
+            [9, 10], [1, 11], [11, 12], [12, 13], [1, 0], [0, 14], [14, 16],
+            [0, 15], [15, 17], [2, 16], [5, 17]
+        ]
+        num_joints = 18
+    # Color scheme for different joints
+    colors = [
+        [255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0],
+        [85, 255, 0], [0, 255, 0], [0, 255, 85], [0, 255, 170], [0, 255, 255],
+        [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], [170, 0, 255],
+        [255, 0, 255], [255, 0, 170], [255, 0, 85], [255,255,0], [255,255,85],
+        [255,255,170], [255,255,255], [170,255,255], [85,255,255], [0,255,255]
+    ]
+    # Draw keypoints
+    for i in range(num_joints):
+        for n in range(len(subset)):
+            index = int(subset[n][i])
+            if index == -1:
+                continue
+            x, y = candidate[index][0:2]
+            cv2.circle(canvas, (int(x), int(y)), 4, colors[i], thickness=-1)
+    # Draw limbs
+    for i in range(num_joints - 1):
+        for n in range(len(subset)):
+            index = subset[n][np.array(limb_sequence[i])]
+            if -1 in index:
+                continue
+            current_canvas = canvas.copy()
+            Y = candidate[index.astype(int), 0]
+            X = candidate[index.astype(int), 1]
+            mean_x = np.mean(X)
+            mean_y = np.mean(Y)
+            length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
+            angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
+            polygon = cv2.ellipse2Poly((int(mean_y), int(mean_x)),
+                                     (int(length / 2), stick_width),
+                                     int(angle), 0, 360, 1)
+            cv2.fillConvexPoly(current_canvas, polygon, colors[i])
+            canvas = cv2.addWeighted(canvas, 0.4, current_canvas, 0.6, 0)
+    return canvas
+def extract_body_pose_data(candidate, subset, model_type='body25'):
+    """
+    Extract body pose data without drawing
+    Returns:
+        tuple: (keypoint_circles, limb_sticks) data for further processing
+    """
+    stick_width = 4
+    if model_type == 'body25':
+        limb_sequence = [
+            [1,0],[1,2],[2,3],[3,4],[1,5],[5,6],[6,7],[1,8],[8,9],[9,10],
+            [10,11],[8,12],[12,13],[13,14],[0,15],[0,16],[15,17],[16,18],
+            [11,24],[11,22],[14,21],[14,19],[22,23],[19,20]
+        ]
+        num_joints = 25
+    else:
+        limb_sequence = [
+            [1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9],
+            [9, 10], [1, 11], [11, 12], [12, 13], [1, 0], [0, 14], [14, 16],
+            [0, 15], [15, 17], [2, 16], [5, 17]
+        ]
+        num_joints = 18
+    # Extract keypoint coordinates
+    keypoint_circles = []
+    for i in range(num_joints):
+        for n in range(len(subset)):
+            index = int(subset[n][i])
+            if index == -1:
+                continue
+            x, y = candidate[index][0:2]
+            keypoint_circles.append((x, y))
+    # Extract limb stick data
+    limb_sticks = []
+    for i in range(num_joints - 1):
+        for n in range(len(subset)):
+            index = subset[n][np.array(limb_sequence[i])]
+            if -1 in index:
+                continue
+            Y = candidate[index.astype(int), 0]
+            X = candidate[index.astype(int), 1]
+            mean_x = np.mean(X)
+            mean_y = np.mean(Y)
+            length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
+            angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
+            limb_sticks.append((mean_y, mean_x, angle, length))
+    return keypoint_circles, limb_sticks
+def draw_hand_pose_visualization(canvas, all_hand_peaks, show_numbers=False):
+    """
+    Draw hand pose keypoints and connections
+    Args:
+        canvas: Image to draw on
+        all_hand_peaks: Detected hand keypoints for both hands
+        show_numbers: Whether to show keypoint numbers
+    """
+    edges = [
+        [0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10],
+        [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]
+    ]
+    fig = Figure(figsize=plt.figaspect(canvas))
+    fig.subplots_adjust(0, 0, 1, 1)
+    bg = FigureCanvas(fig)
+    ax = fig.subplots()
+    ax.axis('off')
+    ax.imshow(canvas)
+    width, height = ax.figure.get_size_inches() * ax.figure.get_dpi()
+    for peaks in all_hand_peaks:
+        for ie, e in enumerate(edges):
+            if np.sum(np.all(peaks[e], axis=1) == 0) == 0:
+                x1, y1 = peaks[e[0]]
+                x2, y2 = peaks[e[1]]
+                ax.plot([x1, x2], [y1, y2],
+                       color=matplotlib.colors.hsv_to_rgb([ie/float(len(edges)), 1.0, 1.0]))
+        for i, keypoint in enumerate(peaks):
+            x, y = keypoint
+            ax.plot(x, y, 'r.')
+            if show_numbers:
+                ax.text(x, y, str(i))
+    bg.draw()
+    canvas = np.fromstring(bg.tostring_rgb(), dtype='uint8').reshape(int(height), int(width), 3)
+    return canvas
+def extract_hand_pose_data(all_hand_peaks, show_numbers=False):
+    """
+    Extract hand pose data without drawing
+    Returns:
+        tuple: (hand_edges, hand_peaks) data for further processing
+    """
+    edges = [
+        [0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10],
+        [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]
+    ]
+    export_edges = [[], []]
+    export_peaks = [[], []]
+    for idx, peaks in enumerate(all_hand_peaks):
+        for ie, e in enumerate(edges):
+            if np.sum(np.all(peaks[e], axis=1) == 0) == 0:
+                x1, y1 = peaks[e[0]]
+                x2, y2 = peaks[e[1]]
+                export_edges[idx].append((ie, (x1, y1), (x2, y2)))
+        for i, keypoint in enumerate(peaks):
+            x, y = keypoint
+            export_peaks[idx].append((x, y, str(i)))
+    return export_edges, export_peaks
+def detect_hand_regions(candidate, subset, original_image):
+    """
+    Detect hand regions based on body pose keypoints
+    Args:
+        candidate: Body pose candidates
+        subset: Valid body pose connections
+        original_image: Original input image
+    Returns:
+        List of detected hand regions [x, y, width, is_left_hand]
+    """
+    ratio_wrist_elbow = 0.33
+    detection_results = []
+    image_height, image_width = original_image.shape[0:2]
+    for person in subset.astype(int):
+        # Check if left hand keypoints exist (shoulder, elbow, wrist)
+        has_left_hand = np.sum(person[[5, 6, 7]] == -1) == 0
+        has_right_hand = np.sum(person[[2, 3, 4]] == -1) == 0
+        if not (has_left_hand or has_right_hand):
+            continue
+        hands = []
+        # Process left hand
+        if has_left_hand:
+            left_shoulder_idx, left_elbow_idx, left_wrist_idx = person[[5, 6, 7]]
+            x1, y1 = candidate[left_shoulder_idx][:2]
+            x2, y2 = candidate[left_elbow_idx][:2]
+            x3, y3 = candidate[left_wrist_idx][:2]
+            hands.append([x1, y1, x2, y2, x3, y3, True])
+        # Process right hand
+        if has_right_hand:
+            right_shoulder_idx, right_elbow_idx, right_wrist_idx = person[[2, 3, 4]]
+            x1, y1 = candidate[right_shoulder_idx][:2]
+            x2, y2 = candidate[right_elbow_idx][:2]
+            x3, y3 = candidate[right_wrist_idx][:2]
+            hands.append([x1, y1, x2, y2, x3, y3, False])
+        for x1, y1, x2, y2, x3, y3, is_left in hands:
+            # Calculate hand region based on wrist and elbow positions
+            x = x3 + ratio_wrist_elbow * (x3 - x2)
+            y = y3 + ratio_wrist_elbow * (y3 - y2)
+            distance_wrist_elbow = math.sqrt((x3 - x2) ** 2 + (y3 - y2) ** 2)
+            distance_elbow_shoulder = math.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
+            width = 1.5 * max(distance_wrist_elbow, 0.9 * distance_elbow_shoulder)
+            # Adjust to top-left corner
+            x -= width / 2
+            y -= width / 2
+            # Ensure bounds are within image
+            x = max(0, x)
+            y = max(0, y)
+            width1 = width if x + width <= image_width else image_width - x
+            width2 = width if y + width <= image_height else image_height - y
+            width = min(width1, width2)
+            # Only include if region is large enough
+            if width >= 20:
+                detection_results.append([int(x), int(y), int(width), is_left])
+    return detection_results
+def render_stick_model(original_img, keypoint_circles, limb_sticks, hand_edges, hand_peaks):
+    """
+    Render complete stick model with body and hand poses
+    Args:
+        original_img: Original image
+        keypoint_circles: Body keypoint coordinates
+        limb_sticks: Body limb stick data
+        hand_edges: Hand connection data
+        hand_peaks: Hand keypoint data
+    """
+    canvas = copy.deepcopy(original_img)
+    colors = [
+        [255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0],
+        [85, 255, 0], [0, 255, 0], [0, 255, 85], [0, 255, 170], [0, 255, 255],
+        [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], [170, 0, 255],
+        [255, 0, 255], [255, 0, 170], [255, 0, 85], [255,255,0], [255,255,85],
+        [255,255,170], [255,255,255], [170,255,255], [85,255,255], [0,255,255]
+    ]
+    stick_width = 4
+    # Draw body limbs
+    for idx, (mean_x, mean_y, angle, length) in enumerate(limb_sticks):
+        current_canvas = canvas.copy()
+        polygon = cv2.ellipse2Poly(
+            (int(mean_x), int(mean_y)),
+            (int(length / 2), stick_width),
+            int(angle), 0, 360, 1
+        )
+        cv2.fillConvexPoly(current_canvas, polygon, colors[idx])
+        canvas = cv2.addWeighted(canvas, 0.4, current_canvas, 0.6, 0)
+    # Draw body keypoints
+    for idx, (x, y) in enumerate(keypoint_circles):
+        cv2.circle(canvas, (int(x), int(y)), 4, colors[idx], thickness=-1)
+    # Draw hand poses using matplotlib
+    fig = Figure(figsize=plt.figaspect(canvas))
+    fig.subplots_adjust(0, 0, 1, 1)
+    ax = fig.subplots()
+    ax.axis('off')
+    ax.imshow(canvas)
+    edges = [
+        [0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9],
+        [9, 10], [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16],
+        [0, 17], [17, 18], [18, 19], [19, 20]
+    ]
+    for hand_edge_set in hand_edges:
+        for (ie, (x1, y1), (x2, y2)) in hand_edge_set:
+            ax.plot([x1, x2], [y1, y2],
+                   color=matplotlib.colors.hsv_to_rgb([ie/float(len(edges)), 1.0, 1.0]))
+    for hand_peak_set in hand_peaks:
+        for (x, y, text) in hand_peak_set:
+            ax.plot(x, y, 'r.')
+    # Convert figure to numpy array
+    bg = FigureCanvas(fig)
+    bg.draw()
+    width, height = fig.get_size_inches() * fig.get_dpi()
+    buf = bg.buffer_rgba()
+    canvas = np.frombuffer(buf, dtype=np.uint8).reshape(int(height), int(width), 4)
+    canvas = canvas[:, :, :3]  # Keep only RGB channels
+    plt.close(fig)  # Clean up
+    return cv2.resize(canvas, (math.ceil(width), math.ceil(height)))
+def create_bar_plot_visualization(image, predictions, title, orig_img):
+    """
+    Create bar plot visualization below the image
+    Args:
+        image: Input image
+        predictions: Dictionary of prediction probabilities
+        title: Plot title
+        orig_img: Original image for sizing
+    """
+    fig, ax = plt.subplots(figsize=(orig_img.shape[1]/100, orig_img.shape[0]/200), dpi=100)
+    plt.title(title)
+    # Create bar plot data
+    labels = list(predictions.keys())
+    probabilities = list(predictions.values())
+    # Create seaborn bar plot
+    sns.barplot(x=labels, y=probabilities, ax=ax)
+    plt.close(fig)  # Close to avoid memory leaks
+    fig.canvas.draw()
+    # Convert plot to numpy array
+    plot_image = np.array(fig.canvas.renderer.buffer_rgba())[:, :, :3]  # Remove alpha
+    # Combine image and plot vertically
+    combined_image = np.vstack((image, cv2.resize(plot_image, (image.shape[1], plot_image.shape[0]))))
+    return combined_image
+def add_bottom_padding(image, pad_value, pad_height):
+    """
+    Add padding to the bottom of an image
+    Args:
+        image: Input image
+        pad_value: Color value for padding (tuple or int)
+        pad_height: Height of padding to add
+    """
+    height, width, channels = image.shape
+    padding = np.zeros((pad_height, width, channels), dtype=image.dtype)
+    padding[:, :, :] = pad_value
+    return np.vstack((image, padding))
+def find_array_maximum(array):
+    """
+    Get maximum index of 2D array
+    Args:
+        array: 2D numpy array
+    Returns:
+        tuple: (row_index, col_index) of maximum value
+    """
+    array_index = array.argmax(1)
+    array_value = array.max(1)
+    i = array_value.argmax()
+    j = array_index[i]
+    return i, j

requirements.txt CHANGED Viewed

@@ -1,3 +1,22 @@
-altair
-pandas
-streamlit

+opencv_python_headless
+streamlit
+numpy
+Pillow
+matplotlib==3.5.3
+opencv-python
+scipy
+scikit-image
+tqdm
+pandas
+torch
+torchaudio
+torchvision
+torchtext
+torchdata
+av
+keras
+ffmpeg
+ffmpeg-python
+seaborn[stats]
+huggingface_hub
+uuid

verify_deployment.py ADDED Viewed

	@@ -0,0 +1,140 @@

+#!/usr/bin/env python3
+"""
+TechMatrix Solvers ISL Translation System
+Deployment Verification Script
+This script verifies that all required files are present for deployment
+"""
+import os
+import sys
+def verify_files():
+    """Verify all required files are present"""
+    required_files = [
+        'README.md',
+        'requirements.txt',
+        'packages.txt',
+        'app.py',
+        'pose_models.py',
+        'pose_utils.py',
+        'isl_processor.py',
+        'expression_mapping.py',
+        'LICENSE',
+        '.gitignore',
+        'categories_processed.png',
+        'DataPipeline.png',
+        'model-graph.png'
+    ]
+    required_dirs = [
+        'eda'
+    ]
+    missing_files = []
+    missing_dirs = []
+    print("🔍 TechMatrix Solvers ISL Translation System")
+    print("📋 Deployment Verification")
+    print("=" * 50)
+    # Check files
+    print("\n📄 Checking required files:")
+    for file in required_files:
+        if os.path.exists(file):
+            print(f"✅ {file}")
+        else:
+            print(f"❌ {file}")
+            missing_files.append(file)
+    # Check directories
+    print("\n📁 Checking required directories:")
+    for dir in required_dirs:
+        if os.path.isdir(dir):
+            print(f"✅ {dir}/")
+        else:
+            print(f"❌ {dir}/")
+            missing_dirs.append(dir)
+    # Check README content for team branding
+    print("\n🏷️ Checking TechMatrix Solvers branding:")
+    if os.path.exists('README.md'):
+        with open('README.md', 'r') as f:
+            readme_content = f.read()
+            if 'TechMatrix Solvers' in readme_content:
+                print("✅ Team branding present in README")
+            else:
+                print("❌ Team branding missing in README")
+            if 'Abhay Gupta' in readme_content:
+                print("✅ Team member info present")
+            else:
+                print("❌ Team member info missing")
+    # Check app.py for proper imports
+    print("\n🔧 Checking main application structure:")
+    if os.path.exists('app.py'):
+        with open('app.py', 'r') as f:
+            app_content = f.read()
+            if 'streamlit' in app_content:
+                print("✅ Streamlit framework detected")
+            if 'TechMatrix Solvers' in app_content:
+                print("✅ Team branding in application")
+            if 'pose_models' in app_content and 'pose_utils' in app_content:
+                print("✅ Core modules imported")
+    print("\n" + "=" * 50)
+    if missing_files or missing_dirs:
+        print("❌ Deployment verification FAILED")
+        if missing_files:
+            print(f"Missing files: {', '.join(missing_files)}")
+        if missing_dirs:
+            print(f"Missing directories: {', '.join(missing_dirs)}")
+        return False
+    else:
+        print("✅ Deployment verification PASSED")
+        print("🚀 Project is ready for deployment!")
+        print("\n📋 Deployment Instructions:")
+        print("1. Upload project to HuggingFace Spaces")
+        print("2. Select Streamlit SDK")
+        print("3. Set app_file: app.py")
+        print("4. The system will automatically install dependencies")
+        print("\n👥 TechMatrix Solvers Team:")
+        print("- Abhay Gupta (Team Lead)")
+        print("- Kripanshu Gupta (Backend Developer)")
+        print("- Dipanshu Patel (UI/UX Designer)")
+        print("- Bhumika Patel (Deployment & Female Presenter)")
+        print("\n🏫 Shri Ram Group of Institutions")
+        return True
+def check_requirements():
+    """Check requirements.txt format"""
+    print("\n📦 Checking dependencies:")
+    try:
+        with open('requirements.txt', 'r') as f:
+            requirements = f.read().strip().split('\n')
+            print(f"✅ Found {len(requirements)} dependencies")
+            # Check for key dependencies
+            key_deps = ['streamlit', 'torch', 'keras', 'opencv-python', 'numpy']
+            for dep in key_deps:
+                if any(dep in req for req in requirements):
+                    print(f"✅ {dep} dependency found")
+                else:
+                    print(f"⚠️  {dep} dependency not explicitly found")
+    except Exception as e:
+        print(f"❌ Error reading requirements.txt: {e}")
+if __name__ == "__main__":
+    print("TechMatrix Solvers ISL Translation System")
+    print("Deployment Verification Tool\n")
+    success = verify_files()
+    check_requirements()
+    if success:
+        sys.exit(0)
+    else:
+        sys.exit(1)