Abs6187 commited on
Commit
04caf86
·
verified ·
1 Parent(s): 51f6591

Create src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +824 -0
src/streamlit_app.py ADDED
@@ -0,0 +1,824 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ISL Sign Language Translation - TechMatrix Solvers Initiative
3
+ Main Streamlit Application
4
+
5
+ Developed by: TechMatrix Solvers Team
6
+ - Abhay Gupta (Team Lead)
7
+ - Kripanshu Gupta (Backend Developer)
8
+ - Dipanshu Patel (UI/UX Designer)
9
+ - Bhumika Patel (Deployment & Female Presenter)
10
+
11
+ Institution: Shri Ram Group of Institutions
12
+ """
13
+
14
+ import streamlit as st
15
+ st.write("🚀 TechMatrix Solvers ISL Translator Loading...")
16
+
17
+ import os
18
+ os.environ["KERAS_BACKEND"] = "torch"
19
+ import keras
20
+
21
+ import cv2
22
+ import numpy as np
23
+ import tempfile
24
+ import time
25
+ from PIL import Image
26
+ from keras.models import Sequential
27
+ import pickle
28
+ from keras.layers import LSTM, Dense, Bidirectional, Dropout, Input, BatchNormalization
29
+ from pose_models import create_bodypose_model, create_handpose_model
30
+ from expression_mapping import expression_mapping
31
+ from isl_processor import ISLTranslationModel
32
+ import pandas as pd
33
+ import ffmpeg
34
+ import subprocess
35
+ from typing import NamedTuple
36
+ import json
37
+ import pose_utils as utils
38
+ from huggingface_hub import hf_hub_download
39
+ import shutil, platform
40
+ import uuid
41
+
42
+ # System information display
43
+ st.write("🔧 **System Information:**")
44
+ st.write(f"Python Version: {platform.python_version()}")
45
+ st.write(f"FFmpeg: {shutil.which('ffmpeg')}, FFprobe: {shutil.which('ffprobe')}")
46
+
47
+ try:
48
+ import cv2
49
+ st.write(f"OpenCV Version: {cv2.__version__}")
50
+ except Exception as e:
51
+ st.error(f"OpenCV import failed: {e}")
52
+
53
+ try:
54
+ import torch
55
+ st.write(f"PyTorch: {torch.__version__}, Keras: {keras.__version__}")
56
+ except Exception as e:
57
+ st.error(f"PyTorch/Keras import failed: {e}")
58
+
59
+
60
+ class VideoProbeResult(NamedTuple):
61
+ """Structure for video probe results"""
62
+ return_code: int
63
+ json: str
64
+ error: str
65
+
66
+
67
+ def probe_video_info(file_path) -> VideoProbeResult:
68
+ """
69
+ Probe video file for metadata using FFprobe
70
+
71
+ Args:
72
+ file_path: Path to video file
73
+
74
+ Returns:
75
+ VideoProbeResult containing metadata
76
+ """
77
+ command_array = [
78
+ "ffprobe",
79
+ "-v", "quiet",
80
+ "-print_format", "json",
81
+ "-show_format",
82
+ "-show_streams",
83
+ file_path
84
+ ]
85
+ result = subprocess.run(
86
+ command_array,
87
+ stdout=subprocess.PIPE,
88
+ stderr=subprocess.PIPE,
89
+ universal_newlines=True
90
+ )
91
+ return VideoProbeResult(
92
+ return_code=result.returncode,
93
+ json=result.stdout,
94
+ error=result.stderr
95
+ )
96
+
97
+
98
+ # Define feature columns for time series processing
99
+ body_features = [f'bodypeaks_x_{i}' for i in range(15)] + [f'bodypeaks_y_{i}' for i in range(15)]
100
+ hand0_features = [f'hand0peaks_x_{i}' for i in range(21)] + [f'hand0peaks_y_{i}' for i in range(21)] + [f'hand0peaks_peaktxt{i}' for i in range(21)]
101
+ hand1_features = [f'hand1peaks_x_{i}' for i in range(21)] + [f'hand1peaks_y_{i}' for i in range(21)] + [f'hand1peaks_peaktxt{i}' for i in range(21)]
102
+
103
+ feature_columns_processed = body_features + hand0_features + hand1_features
104
+ label_columns = ['Expression_encoded']
105
+
106
+
107
+ @st.cache_resource
108
+ def create_time_series_sequences(isl_data, feature_columns, label_columns, window_size=20):
109
+ """
110
+ Creates time series sequences from DataFrame with specified window size
111
+
112
+ Args:
113
+ isl_data: Input DataFrame with ISL data
114
+ feature_columns: List of feature column names
115
+ label_columns: List of label column names
116
+ window_size: Size of temporal window for sequence creation
117
+
118
+ Returns:
119
+ tuple: (X_sequences, y_sequences) for training/inference
120
+ """
121
+ if isl_data.empty:
122
+ return [], []
123
+
124
+ X_sequences = []
125
+ y_sequences = []
126
+
127
+ for group, file_df in isl_data.groupby(['Type', 'Expression_encoded', 'FileName']):
128
+ expr_type, expression, filename = group
129
+
130
+ # Create blank frame for padding
131
+ blank_frame = np.zeros((1, 156))
132
+
133
+ for idx, window_data in enumerate([file_df[i:i+window_size] for i in range(0, file_df.shape[0], 1)]):
134
+ if window_data.shape[0] < window_size:
135
+ # Pad sequence with blank frames at the beginning
136
+ padding_needed = window_size - window_data.shape[0]
137
+ padded_sequence = np.concatenate(
138
+ (np.repeat(blank_frame, padding_needed, axis=0),
139
+ window_data[feature_columns].values),
140
+ axis=0
141
+ )
142
+ X_sequences.append(padded_sequence)
143
+ y_sequences.append(expression)
144
+ continue
145
+
146
+ X_sequences.append(window_data[feature_columns].values)
147
+ y_sequences.append(expression)
148
+
149
+ return X_sequences, y_sequences
150
+
151
+
152
+ # Global translation model variable
153
+ translation_model = None
154
+
155
+
156
+ @st.cache_resource
157
+ def load_translation_model():
158
+ """
159
+ Load and configure the LSTM translation model
160
+
161
+ Returns:
162
+ Configured Keras Sequential model for ISL translation
163
+ """
164
+ model = Sequential()
165
+ model.add(Input(shape=((20, 156))))
166
+ model.add(keras.layers.Masking(mask_value=0.))
167
+ model.add(BatchNormalization())
168
+ model.add(Bidirectional(LSTM(32, recurrent_dropout=0.2, return_sequences=True)))
169
+
170
+ model.add(Dropout(0.2))
171
+ model.add(Bidirectional(LSTM(32, recurrent_dropout=0.2)))
172
+
173
+ model.add(keras.layers.Activation('elu'))
174
+ model.add(Dense(32, use_bias=False, kernel_initializer='he_normal'))
175
+
176
+ model.add(BatchNormalization())
177
+ model.add(Dropout(0.2))
178
+ model.add(keras.layers.Activation('elu'))
179
+ model.add(Dense(32, kernel_initializer='he_normal', use_bias=False))
180
+
181
+ model.add(BatchNormalization())
182
+ model.add(keras.layers.Activation('elu'))
183
+ model.add(Dropout(0.2))
184
+ model.add(Dense(len(list(expression_mapping.keys())), activation='softmax'))
185
+
186
+ # Download pre-trained model weights
187
+ model_file = hf_hub_download(
188
+ repo_id="sunilsarolkar/isl-translation-model",
189
+ filename="isl_model_final.keras"
190
+ )
191
+ model.load_weights(model_file)
192
+
193
+ return model
194
+
195
+
196
+ # Load test data
197
+ @st.cache_data
198
+ def load_test_data():
199
+ """Load test dataset and file information"""
200
+ testing_cleaned_path = hf_hub_download(
201
+ repo_id="sunilsarolkar/isl-test-data",
202
+ filename="testing_cleaned.csv",
203
+ repo_type="dataset"
204
+ )
205
+
206
+ test_files_path = hf_hub_download(
207
+ repo_id="sunilsarolkar/isl-test-data",
208
+ filename="test_files.csv",
209
+ repo_type="dataset"
210
+ )
211
+
212
+ testing_df = pd.read_csv(testing_cleaned_path)
213
+ test_files_df = pd.read_csv(test_files_path)
214
+
215
+ return testing_df, test_files_df
216
+
217
+
218
+ # Load test data
219
+ testing_df, test_files_df = load_test_data()
220
+
221
+
222
+ class VideoWriter:
223
+ """Custom video writer using FFmpeg for better compatibility"""
224
+
225
+ def __init__(self, output_file, input_fps, input_framesize, input_pix_fmt, input_vcodec):
226
+ self.ff_process = (
227
+ ffmpeg
228
+ .input('pipe:',
229
+ format='rawvideo',
230
+ pix_fmt="bgr24",
231
+ s=f'{input_framesize[1]}x{input_framesize[0]}',
232
+ r=input_fps)
233
+ .output(output_file, pix_fmt=input_pix_fmt, vcodec=input_vcodec)
234
+ .overwrite_output()
235
+ .run_async(pipe_stdin=True)
236
+ )
237
+
238
+ def write_frame(self, frame):
239
+ """Write a single frame to the video"""
240
+ self.ff_process.stdin.write(frame.tobytes())
241
+
242
+ def close(self):
243
+ """Close the video writer"""
244
+ self.ff_process.stdin.close()
245
+ self.ff_process.wait()
246
+
247
+
248
+ def calculate_weighted_average(numbers, weights):
249
+ """
250
+ Calculate weighted average of numbers
251
+
252
+ Args:
253
+ numbers: List of numbers
254
+ weights: List of weights
255
+
256
+ Returns:
257
+ float: Weighted average
258
+ """
259
+ if sum(weights) == 0:
260
+ return 0
261
+ return sum(x * y for x, y in zip(numbers, weights)) / sum(weights)
262
+
263
+
264
+ @st.cache_data
265
+ def resize_image(image, width=None, height=None, interpolation=cv2.INTER_AREA):
266
+ """
267
+ Resize image maintaining aspect ratio
268
+
269
+ Args:
270
+ image: Input image
271
+ width: Target width
272
+ height: Target height
273
+ interpolation: OpenCV interpolation method
274
+
275
+ Returns:
276
+ Resized image
277
+ """
278
+ dimensions = None
279
+ (h, w) = image.shape[:2]
280
+
281
+ if width is None and height is None:
282
+ return image
283
+
284
+ if width is None:
285
+ ratio = height / float(h)
286
+ dimensions = (int(w * ratio), height)
287
+ else:
288
+ ratio = width / float(w)
289
+ dimensions = (width, int(h * ratio))
290
+
291
+ resized = cv2.resize(image, dimensions, interpolation=interpolation)
292
+ return resized
293
+
294
+
295
+ # Configure Streamlit page
296
+ st.set_page_config(
297
+ page_title="ISL Translation - TechMatrix Solvers",
298
+ page_icon="🤟",
299
+ layout="wide"
300
+ )
301
+
302
+ st.title('🤟 ISL Sign Language Translation - TechMatrix Solvers Initiative')
303
+
304
+ # Add custom CSS for sidebar styling
305
+ st.markdown(
306
+ """
307
+ <style>
308
+ [data-testid="stSidebar"][aria-expanded="true"] > div:first-child {
309
+ width: 350px;
310
+ }
311
+ [data-testid="stSidebar"][aria-expanded="false"] > div:first-child {
312
+ width: 350px;
313
+ margin-left: -350px;
314
+ }
315
+
316
+ .team-info {
317
+ background-color: #f0f2f6;
318
+ padding: 1rem;
319
+ border-radius: 0.5rem;
320
+ margin: 1rem 0;
321
+ }
322
+
323
+ .tech-matrix-header {
324
+ background: linear-gradient(90deg, #1e3a8a, #7c3aed);
325
+ color: white;
326
+ padding: 1rem;
327
+ border-radius: 0.5rem;
328
+ text-align: center;
329
+ margin-bottom: 1rem;
330
+ }
331
+ </style>
332
+ """,
333
+ unsafe_allow_html=True,
334
+ )
335
+
336
+ # Add team branding header
337
+ st.markdown(
338
+ """
339
+ <div class="tech-matrix-header">
340
+ <h2>🚀 TechMatrix Solvers</h2>
341
+ <p>Innovating Accessible Technology Solutions</p>
342
+ </div>
343
+ """,
344
+ unsafe_allow_html=True
345
+ )
346
+
347
+ # Sidebar configuration
348
+ st.sidebar.title('🤟 ISL Translation System')
349
+ st.sidebar.subheader('Configuration')
350
+
351
+ # Team information in sidebar
352
+ st.sidebar.markdown(
353
+ """
354
+ <div class="team-info">
355
+ <h3>👨‍💻 Development Team</h3>
356
+ <ul>
357
+ <li><strong>Abhay Gupta</strong> - Team Lead</li>
358
+ <li><strong>Kripanshu Gupta</strong> - Backend Dev</li>
359
+ <li><strong>Dipanshu Patel</strong> - UI/UX Designer</li>
360
+ <li><strong>Bhumika Patel</strong> - Deployment</li>
361
+ </ul>
362
+ <p><em>Shri Ram Group of Institutions</em></p>
363
+ </div>
364
+ """,
365
+ unsafe_allow_html=True
366
+ )
367
+
368
+ # Initialize frame-wise outputs storage
369
+ frame_predictions = {}
370
+
371
+ # Application mode selection
372
+ app_mode = st.sidebar.selectbox(
373
+ 'Choose Application Mode',
374
+ ['About Project', 'Test Video Translation']
375
+ )
376
+
377
+ if app_mode == 'About Project':
378
+ st.markdown(
379
+ """
380
+ ## 🎯 Project Overview
381
+
382
+ Welcome to the **ISL Sign Language Translation System** developed by **TechMatrix Solvers**.
383
+ This cutting-edge application demonstrates real-time Indian Sign Language recognition and
384
+ translation using advanced deep learning techniques.
385
+
386
+ ### 🏗️ Technical Architecture
387
+
388
+ Our system combines multiple state-of-the-art technologies:
389
+
390
+ 1. **Body Pose Estimation**: 25-point skeletal tracking using OpenPose
391
+ 2. **Hand Landmark Detection**: 21-point hand keypoint identification
392
+ 3. **Temporal Modeling**: Bidirectional LSTM networks for sequence analysis
393
+ 4. **Real-time Processing**: Optimized inference pipeline for live translation
394
+ """
395
+ )
396
+
397
+ st.markdown(
398
+ """
399
+ ### 📊 Dataset Information
400
+
401
+ Our model is trained on the comprehensive [INCLUDE dataset](https://zenodo.org/records/4010759):
402
+ """
403
+ )
404
+
405
+ # Dataset statistics table
406
+ dataset_stats = {
407
+ "Metric": [
408
+ "Categories", "Total Words", "Training Videos",
409
+ "Avg Videos/Class", "Avg Video Length", "Resolution", "Frame Rate"
410
+ ],
411
+ "Value": [
412
+ "15", "263", "4,257", "16.3", "2.57s", "1920x1080", "25fps"
413
+ ]
414
+ }
415
+ st.table(pd.DataFrame(dataset_stats))
416
+
417
+ # Display dataset processing visualization
418
+ try:
419
+ categories_image = np.array(Image.open('original_project/categories_processed.png'))
420
+ st.image(categories_image, caption="📈 Processed Categories Distribution")
421
+ except:
422
+ st.info("📊 Dataset visualization images will be displayed when available")
423
+
424
+ # Model architecture information
425
+ st.markdown(
426
+ """
427
+ ### 🧠 Neural Network Architecture
428
+
429
+ ```python
430
+ # TechMatrix Solvers LSTM Translation Model
431
+ model = Sequential([
432
+ Input(shape=(20, 156)), # 20-frame temporal window
433
+ Masking(mask_value=0.),
434
+ BatchNormalization(),
435
+ Bidirectional(LSTM(32, recurrent_dropout=0.2, return_sequences=True)),
436
+ Dropout(0.2),
437
+ Bidirectional(LSTM(32, recurrent_dropout=0.2)),
438
+ Dense(32, activation='elu'),
439
+ BatchNormalization(),
440
+ Dropout(0.2),
441
+ Dense(len(expression_mapping), activation='softmax')
442
+ ])
443
+ ```
444
+
445
+ **Model Statistics:**
446
+ - Total Parameters: 82,679 (322.96 KB)
447
+ - Trainable Parameters: 82,239 (321.25 KB)
448
+ - Input Features: 156-dimensional vectors
449
+ - Temporal Window: 20 frames
450
+ """
451
+ )
452
+
453
+ # Technology stack
454
+ col1, col2 = st.columns(2)
455
+
456
+ with col1:
457
+ st.markdown(
458
+ """
459
+ ### 🛠️ Technology Stack
460
+
461
+ **Frontend & UI:**
462
+ - Streamlit (Interactive Web App)
463
+ - Custom CSS Styling
464
+ - Responsive Design
465
+
466
+ **Deep Learning:**
467
+ - Keras/TensorFlow Backend
468
+ - PyTorch Integration
469
+ - LSTM Networks
470
+ - OpenPose Models
471
+ """
472
+ )
473
+
474
+ with col2:
475
+ st.markdown(
476
+ """
477
+ ### 📱 Key Features
478
+
479
+ **Real-time Processing:**
480
+ - Live video analysis
481
+ - Pose keypoint extraction
482
+ - Temporal sequence modeling
483
+ - Confidence scoring
484
+
485
+ **User Experience:**
486
+ - Intuitive interface
487
+ - Visual feedback
488
+ - Progress tracking
489
+ - Result visualization
490
+ """
491
+ )
492
+
493
+ # Team contact information
494
+ st.markdown(
495
+ """
496
+ ### 📞 Contact Information
497
+
498
+ **TechMatrix Solvers Team:**
499
+
500
+ | Name | Role | Email | Phone |
501
+ |------|------|-------|--------|
502
+ | **Abhay Gupta** | Team Lead | contact2abhaygupta6187@gmail.com | 8115814535 |
503
+ | **Kripanshu Gupta** | Backend Developer | guptakripanshu83@gmail.com | 7067058400 |
504
+ | **Dipanshu Patel** | UI/UX Designer | dipanshupatel43@gmail.com | 9294526404 |
505
+ | **Bhumika Patel** | Deployment & Presenter | bp7249951@gmail.com | 9302271422 |
506
+
507
+ **Institution:** Shri Ram Group of Institutions
508
+
509
+ ### 📚 Documentation
510
+
511
+ For detailed technical documentation and implementation details, please refer to our
512
+ [comprehensive documentation](https://docs.google.com/document/d/1mzr2KGHRJT5heUjFF20NQ3Gb89urpjZJ/edit?usp=sharing).
513
+
514
+ ---
515
+
516
+ **© 2024 TechMatrix Solvers - Innovating Accessible Technology Solutions**
517
+ """
518
+ )
519
+
520
+ elif app_mode == 'Test Video Translation':
521
+ # Video selection interface
522
+ st.markdown("## 🎥 Test Video Translation")
523
+
524
+ category = st.sidebar.selectbox(
525
+ 'Choose Category',
526
+ np.sort(test_files_df['Category'].unique(), axis=-1, kind='mergesort')
527
+ )
528
+
529
+ # Filter by category
530
+ category_mask = (test_files_df['Category'] == category)
531
+ test_files_category = test_files_df[category_mask]
532
+
533
+ class_name = st.sidebar.selectbox(
534
+ 'Choose Class',
535
+ np.sort(test_files_category['Class'].unique(), axis=-1, kind='mergesort')
536
+ )
537
+
538
+ # Filter by class
539
+ class_mask = (test_files_df['Class'] == class_name)
540
+ filename = st.sidebar.selectbox(
541
+ 'Choose File',
542
+ np.sort(test_files_category[class_mask]['Filename'].unique(), axis=-1, kind='mergesort')
543
+ )
544
+
545
+ # Display selection info
546
+ st.info(f"📂 Selected: {category} → {class_name} → {filename}")
547
+
548
+ if st.sidebar.button("🚀 Start Translation", type="primary"):
549
+ # Filter test data for selected video
550
+ data_mask = ((testing_df['FileName'] == filename) &
551
+ (testing_df['Type'] == category) &
552
+ (testing_df['Expression'] == class_name))
553
+
554
+ window_size = 20
555
+ current_test_data = testing_df[data_mask]
556
+
557
+ if current_test_data.empty:
558
+ st.error(f"⚠️ No matching data found for: {filename} | {category} | {class_name}")
559
+ st.stop()
560
+ else:
561
+ st.success(f"✅ Loaded {current_test_data.shape[0]} frames for processing")
562
+
563
+ # Create time series data
564
+ X_test_processed, y_test_processed = create_time_series_sequences(
565
+ current_test_data, feature_columns_processed, label_columns, window_size=window_size
566
+ )
567
+ X_test_processed = np.array(X_test_processed)
568
+
569
+ # Configure Streamlit display options
570
+ st.set_option('deprecation.showfileUploaderEncoding', False)
571
+
572
+ st.sidebar.markdown('---')
573
+ st.markdown(
574
+ """
575
+ <style>
576
+ [data-testid="stSidebar"][aria-expanded="true"] > div:first-child {
577
+ width: 400px;
578
+ }
579
+ [data-testid="stSidebar"][aria-expanded="false"] > div:first-child {
580
+ width: 400px;
581
+ margin-left: -400px;
582
+ }
583
+ </style>
584
+ """,
585
+ unsafe_allow_html=True,
586
+ )
587
+
588
+ st.sidebar.markdown('---')
589
+ st.markdown('## 📊 Translation Results')
590
+
591
+ # Progress tracking container
592
+ progress_container = st.empty()
593
+
594
+ with progress_container.container():
595
+ progress_df = pd.DataFrame([['--', '--']],
596
+ columns=['Frames Processed', 'Detected Sign'])
597
+ progress_table = st.table(progress_df)
598
+
599
+ # Video display container
600
+ video_display = st.empty()
601
+ st.markdown("<hr/>", unsafe_allow_html=True)
602
+ frame_display = st.empty()
603
+
604
+ # Download test video
605
+ video_file_path = hf_hub_download(
606
+ repo_id="sunilsarolkar/isl-test-data",
607
+ filename=f'test/{category}/{class_name}/{filename}',
608
+ repo_type="dataset"
609
+ )
610
+
611
+ if not os.path.exists(video_file_path):
612
+ st.error(f"⚠️ Video file not found: {video_file_path}")
613
+ st.stop()
614
+
615
+ # Initialize video capture
616
+ video_capture = cv2.VideoCapture(video_file_path)
617
+
618
+ # Get video metadata
619
+ probe_result = probe_video_info(video_file_path)
620
+ video_info = json.loads(probe_result.json)
621
+ video_stream = [stream for stream in video_info["streams"] if stream["codec_type"] == "video"][0]
622
+
623
+ input_fps = video_stream["avg_frame_rate"]
624
+ input_pix_fmt = video_stream["pix_fmt"]
625
+ input_vcodec = video_stream["codec_name"]
626
+ format_name = video_info["format"]["format_name"].split(",")[0]
627
+
628
+ # Video properties
629
+ width = int(video_capture.get(cv2.CAP_PROP_FRAME_WIDTH))
630
+ height = int(video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
631
+ fps_input = int(video_capture.get(cv2.CAP_PROP_FPS))
632
+
633
+ # Processing variables
634
+ total_frames = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
635
+ frame_buffer = []
636
+
637
+ # Output video configuration
638
+ output_file = f"/tmp/techmatrix_output_{uuid.uuid4().hex}.{format_name}"
639
+ video_writer = None
640
+ weighted_predictions = {}
641
+
642
+ frame_idx = 0
643
+
644
+ try:
645
+ # Process each frame
646
+ for _, frame_data in current_test_data.iterrows():
647
+ if not video_capture.isOpened():
648
+ st.error(f"❌ Could not open video: {video_file_path}")
649
+ break
650
+
651
+ if video_capture.isOpened():
652
+ ret, frame = video_capture.read()
653
+
654
+ if len(frame_buffer) < window_size:
655
+ # Initial frames - build up buffer
656
+ visualization_canvas = utils.render_stick_model(
657
+ frame,
658
+ eval(frame_data['bodypose_circles']),
659
+ eval(frame_data['bodypose_sticks']),
660
+ eval(frame_data['handpose_edges']),
661
+ eval(frame_data['handpose_peaks'])
662
+ )
663
+
664
+ # Add prediction plots
665
+ canvas_with_predictions = utils.create_bar_plot_visualization(
666
+ visualization_canvas, {},
667
+ f'Building Buffer - Frame {frame_idx + 1} [No Predictions Yet]',
668
+ visualization_canvas
669
+ )
670
+ canvas_with_predictions = utils.create_bar_plot_visualization(
671
+ canvas_with_predictions, weighted_predictions,
672
+ f'Weighted Average - Frame {frame_idx + 1} [No Predictions Yet]',
673
+ visualization_canvas
674
+ )
675
+ canvas_with_predictions = utils.add_bottom_padding(
676
+ canvas_with_predictions, (255, 255, 255), 100
677
+ )
678
+
679
+ # Initialize video writer
680
+ if video_writer is None:
681
+ input_framesize = canvas_with_predictions.shape[:2]
682
+ video_writer = VideoWriter(output_file, input_fps, input_framesize,
683
+ input_pix_fmt, input_vcodec)
684
+
685
+ video_writer.write_frame(canvas_with_predictions)
686
+
687
+ # Update progress display
688
+ with progress_container.container():
689
+ progress_df = pd.DataFrame(
690
+ [[f'{frame_idx + 1}/{current_test_data.shape[0]}',
691
+ '<Building 20-frame buffer>']],
692
+ columns=['Frames Processed', 'Detected Sign']
693
+ )
694
+ progress_table = st.table(progress_df)
695
+
696
+ frame_buffer.append(frame)
697
+
698
+ # Display current frame
699
+ with video_display.container():
700
+ st.image(canvas_with_predictions, channels='BGR', use_column_width=True)
701
+ else:
702
+ # Process with full buffer - make predictions
703
+ frame_buffer[:-1] = frame_buffer[1:]
704
+ frame_buffer[-1] = frame
705
+
706
+ # Load translation model
707
+ translation_model = load_translation_model()
708
+
709
+ # Make prediction on current window
710
+ sequence_idx = frame_idx - 20
711
+ prediction_output = translation_model(
712
+ X_test_processed[sequence_idx].reshape(
713
+ 1, X_test_processed[sequence_idx].shape[0],
714
+ X_test_processed[sequence_idx].shape[1]
715
+ )
716
+ )
717
+ prediction_output = prediction_output[0].cpu().detach().numpy()
718
+
719
+ # Get top predictions
720
+ top_prediction_idx = np.argmax(prediction_output)
721
+ top_3_indices = prediction_output.argsort()[-3:][::-1]
722
+ top_3_signs = [expression_mapping[i] for i in top_3_indices]
723
+ top_3_probabilities = prediction_output[top_3_indices]
724
+
725
+ # Update frame-wise predictions for weighted average
726
+ for sign, prob in zip(top_3_signs, top_3_probabilities):
727
+ if sign not in frame_predictions:
728
+ frame_predictions[sign] = []
729
+ frame_predictions[sign].append(prob)
730
+
731
+ # Current frame predictions
732
+ current_predictions = {}
733
+ for sign, prob in zip(top_3_signs, top_3_probabilities):
734
+ current_predictions[sign] = prob
735
+
736
+ # Calculate weighted averages
737
+ for sign in frame_predictions:
738
+ sign_predictions = frame_predictions[sign]
739
+ sign_weights = [len(sign_predictions) for _ in range(len(sign_predictions))]
740
+ weighted_predictions[sign] = calculate_weighted_average(
741
+ sign_predictions, sign_weights
742
+ )
743
+
744
+ # Sort predictions by confidence
745
+ sorted_predictions = dict(
746
+ sorted(weighted_predictions.items(), key=lambda item: item[1], reverse=True)
747
+ )
748
+
749
+ # Create visualization
750
+ visualization_canvas = utils.render_stick_model(
751
+ frame,
752
+ eval(frame_data['bodypose_circles']),
753
+ eval(frame_data['bodypose_sticks']),
754
+ eval(frame_data['handpose_edges']),
755
+ eval(frame_data['handpose_peaks'])
756
+ )
757
+
758
+ # Add prediction visualizations
759
+ canvas_with_predictions = utils.create_bar_plot_visualization(
760
+ visualization_canvas, current_predictions,
761
+ f'Current Window Prediction (Frames {sequence_idx + 1}-{frame_idx + 1})',
762
+ visualization_canvas
763
+ )
764
+ canvas_with_predictions = utils.create_bar_plot_visualization(
765
+ canvas_with_predictions, weighted_predictions,
766
+ f'Cumulative Weighted Average - Frame {frame_idx + 1}',
767
+ visualization_canvas
768
+ )
769
+ canvas_with_predictions = utils.add_bottom_padding(
770
+ canvas_with_predictions, (255, 255, 255), 100
771
+ )
772
+
773
+ video_writer.write_frame(canvas_with_predictions)
774
+
775
+ # Get best prediction for display
776
+ best_sign = max(weighted_predictions, key=weighted_predictions.get)
777
+ best_confidence = weighted_predictions[best_sign]
778
+
779
+ # Update progress display
780
+ with progress_container.container():
781
+ progress_df = pd.DataFrame(
782
+ [[f'{frame_idx + 1}/{current_test_data.shape[0]}',
783
+ f'{best_sign} ({best_confidence * 100:.2f}%)']],
784
+ columns=['Frames Processed', 'Detected Sign']
785
+ )
786
+ progress_table = st.table(progress_df)
787
+
788
+ # Display current frame
789
+ with video_display.container():
790
+ st.image(canvas_with_predictions, channels='BGR', use_column_width=True)
791
+
792
+ frame_idx += 1
793
+
794
+ # Finalize video processing
795
+ st.success("✅ Video processing completed!")
796
+
797
+ with video_display.container():
798
+ if video_writer is not None:
799
+ video_writer.close()
800
+ with open(output_file, 'rb') as video_file:
801
+ output_video_bytes = video_file.read()
802
+ st.video(output_video_bytes)
803
+ st.info(f"💾 Processed video saved: {output_file}")
804
+ else:
805
+ st.warning("⚠️ No video output generated")
806
+
807
+ finally:
808
+ # Clean up resources
809
+ video_capture.release()
810
+ if video_writer is not None:
811
+ video_writer.close()
812
+ cv2.destroyAllWindows()
813
+
814
+ # Footer
815
+ st.markdown(
816
+ """
817
+ ---
818
+ <div style="text-align: center; color: #666;">
819
+ <p><strong>TechMatrix Solvers</strong> | Shri Ram Group of Institutions</p>
820
+ <p>Innovating Accessible Technology Solutions for Everyone 🚀</p>
821
+ </div>
822
+ """,
823
+ unsafe_allow_html=True
824
+ )