Faham commited on
Commit
b1acf7e
·
1 Parent(s): e2e4e08

UPDATE: codebase refactored to be more readble and optimized

Browse files
.gitignore CHANGED
@@ -1,6 +1,6 @@
1
  # Model files
2
  *.pth
3
- models/*.pth
4
  *.pt
5
  *.pkl
6
  *.h5
@@ -40,7 +40,6 @@ venv/
40
  env/
41
  ENV/
42
  .venv/
43
- .venv2/
44
  .env/
45
 
46
  # IDE
@@ -63,4 +62,4 @@ logs/
63
  .cache/
64
  .pytest_cache/
65
 
66
- models/
 
1
  # Model files
2
  *.pth
3
+ model_weights/*.pth
4
  *.pt
5
  *.pkl
6
  *.h5
 
40
  env/
41
  ENV/
42
  .venv/
 
43
  .env/
44
 
45
  # IDE
 
62
  .cache/
63
  .pytest_cache/
64
 
65
+ model_weights/
app.py CHANGED
@@ -1,781 +1,67 @@
 
 
 
 
 
 
1
  import streamlit as st
2
  import pandas as pd
3
  from PIL import Image
4
- import os
5
- import torch
6
- import torch.nn as nn
7
- from torchvision import transforms, models
8
- import torch.nn.functional as F
9
- import cv2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
- # Import the Google Drive model manager
12
- from simple_model_manager import SimpleModelManager
 
13
 
14
  # Page configuration
15
  st.set_page_config(
16
- page_title="Multimodal Sentiment Analysis",
17
- page_icon="🧠",
18
- layout="wide",
19
  initial_sidebar_state="expanded",
20
  )
21
 
22
- # Custom CSS for better styling
23
- st.markdown(
24
- """
25
- <style>
26
- .main-header {
27
- font-size: 2.5rem;
28
- font-weight: bold;
29
- color: #1f77b4;
30
- text-align: center;
31
- margin-bottom: 2rem;
32
- }
33
- .model-card {
34
- background-color: #f0f2f6;
35
- padding: 1.5rem;
36
- border-radius: 10px;
37
- margin: 1rem 0;
38
- border-left: 4px solid #1f77b4;
39
- }
40
- .result-box {
41
- background-color: #e8f4fd;
42
- padding: 1rem;
43
- border-radius: 8px;
44
- border: 1px solid #1f77b4;
45
- margin: 1rem 0;
46
- }
47
- .upload-section {
48
- background-color: #f8f9fa;
49
- padding: 1.5rem;
50
- border-radius: 10px;
51
- border: 2px dashed #dee2e6;
52
- text-align: center;
53
- margin: 1rem 0;
54
- }
55
- </style>
56
- """,
57
- unsafe_allow_html=True,
58
- )
59
-
60
-
61
- # Initialize the Google Drive model manager
62
- @st.cache_resource
63
- def get_model_manager():
64
- """Get the Google Drive model manager instance"""
65
- try:
66
- manager = SimpleModelManager()
67
- return manager
68
- except Exception as e:
69
- st.error(f"Failed to initialize model manager: {e}")
70
- return None
71
-
72
-
73
- # Global variables for models
74
- @st.cache_resource
75
- def load_vision_model():
76
- """Load the pre-trained ResNet-50 vision sentiment model from Google Drive"""
77
- try:
78
- manager = get_model_manager()
79
- if manager is None:
80
- st.error("Model manager not available")
81
- return None, None, None
82
-
83
- # Load the model using the Google Drive manager
84
- model, device, num_classes = manager.load_vision_model()
85
-
86
- if model is None:
87
- st.error("Failed to load vision model from Google Drive")
88
- return None, None, None
89
-
90
- st.success(f"Vision model loaded successfully with {num_classes} classes!")
91
- return model, device, num_classes
92
- except Exception as e:
93
- st.error(f"Error loading vision model: {str(e)}")
94
- return None, None, None
95
-
96
-
97
- @st.cache_data
98
- def get_vision_transforms():
99
- """Get the image transforms used during FER2013 training"""
100
- return transforms.Compose(
101
- [
102
- transforms.Resize(224), # Match training: transforms.Resize(224)
103
- transforms.CenterCrop(224), # Match training: transforms.CenterCrop(224)
104
- transforms.ToTensor(),
105
- transforms.Normalize(
106
- mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
107
- ), # ImageNet normalization
108
- ]
109
- )
110
-
111
-
112
- def detect_and_preprocess_face(image, crop_tightness=0.05):
113
- """
114
- Detect face in image, crop to face region, convert to grayscale, and resize to 224x224
115
- to match FER2013 dataset format (grayscale converted to 3-channel RGB)
116
-
117
- Args:
118
- image: Input image (PIL Image or numpy array)
119
- crop_tightness: Padding around face (0.0 = no padding, 0.3 = 30% padding)
120
- """
121
- try:
122
- import cv2
123
- import numpy as np
124
-
125
- # Convert PIL image to OpenCV format
126
- if isinstance(image, Image.Image):
127
- # Convert PIL to numpy array
128
- img_array = np.array(image)
129
- # Convert RGB to BGR for OpenCV
130
- if len(img_array.shape) == 3:
131
- img_array = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
132
- else:
133
- img_array = image
134
-
135
- # Load face detection cascade
136
- face_cascade = cv2.CascadeClassifier(
137
- cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
138
- )
139
-
140
- # Convert to grayscale for face detection (detection works better on grayscale)
141
- gray = cv2.cvtColor(img_array, cv2.COLOR_BGR2GRAY)
142
-
143
- # Detect faces
144
- faces = face_cascade.detectMultiScale(
145
- gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30)
146
- )
147
-
148
- if len(faces) == 0:
149
- st.warning("No face detected in the image. Using center crop instead.")
150
- # Fallback: center crop and resize
151
- if isinstance(image, Image.Image):
152
- # Convert to RGB first
153
- rgb_pil = image.convert("RGB")
154
- # Center crop to square
155
- width, height = rgb_pil.size
156
- size = min(width, height)
157
- left = (width - size) // 2
158
- top = (height - size) // 2
159
- right = left + size
160
- bottom = top + size
161
- cropped = rgb_pil.crop((left, top, right, bottom))
162
- # Resize to 224x224 (matching FER2013 training: transforms.Resize(224))
163
- resized = cropped.resize((224, 224), Image.Resampling.LANCZOS)
164
-
165
- # Convert to grayscale and then to 3-channel RGB
166
- gray_pil = resized.convert("L")
167
- # Convert back to RGB (this replicates grayscale values to all 3 channels)
168
- gray_rgb_pil = gray_pil.convert("RGB")
169
- return gray_rgb_pil
170
- else:
171
- return None
172
-
173
- # Get the largest face (assuming it's the main subject)
174
- x, y, w, h = max(faces, key=lambda rect: rect[2] * rect[3])
175
-
176
- # Add padding around the face based on user preference
177
- padding_x = int(w * crop_tightness)
178
- padding_y = int(h * crop_tightness)
179
-
180
- # Ensure we don't go out of bounds
181
- x1 = max(0, x - padding_x)
182
- y1 = max(0, y - padding_y)
183
- x2 = min(img_array.shape[1], x + w + padding_x)
184
- y2 = min(img_array.shape[0], y + h + padding_y)
185
-
186
- # Crop to face region
187
- face_crop = img_array[y1:y2, x1:x2]
188
-
189
- # Convert BGR to RGB first
190
- face_crop_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
191
-
192
- # Convert to grayscale
193
- face_gray = cv2.cvtColor(face_crop_rgb, cv2.COLOR_RGB2GRAY)
194
-
195
- # Resize to 224x224 (matching FER2013 training: transforms.Resize(224))
196
- face_resized = cv2.resize(face_gray, (224, 224), interpolation=cv2.INTER_AREA)
197
-
198
- # Convert grayscale to 3-channel RGB (replicate grayscale values)
199
- face_rgb_3channel = cv2.cvtColor(face_resized, cv2.COLOR_GRAY2RGB)
200
-
201
- # Convert back to PIL Image
202
- face_pil = Image.fromarray(face_rgb_3channel)
203
-
204
- return face_pil
205
-
206
- except ImportError:
207
- st.error(
208
- "OpenCV not installed. Please install it with: pip install opencv-python"
209
- )
210
- st.info("Falling back to basic preprocessing...")
211
- # Fallback: basic grayscale conversion and resize
212
- if isinstance(image, Image.Image):
213
- rgb_pil = image.convert("RGB")
214
- resized = rgb_pil.resize((48, 48), Image.Resampling.LANCZOS)
215
- # Convert to grayscale and then to 3-channel RGB
216
- gray_pil = resized.convert("L")
217
- gray_rgb_pil = gray_pil.convert("RGB")
218
- return gray_rgb_pil
219
- return None
220
- except Exception as e:
221
- st.error(f"Error in face detection: {str(e)}")
222
- st.info("Falling back to basic preprocessing...")
223
- # Fallback: basic grayscale conversion and resize
224
- if isinstance(image, Image.Image):
225
- rgb_pil = image.convert("RGB")
226
- resized = rgb_pil.resize((48, 48), Image.Resampling.LANCZOS)
227
- # Convert to grayscale and then to 3-channel RGB
228
- gray_pil = resized.convert("L")
229
- gray_rgb_pil = gray_pil.convert("RGB")
230
- return gray_rgb_pil
231
- return None
232
-
233
-
234
- def get_sentiment_mapping(num_classes):
235
- """Get the sentiment mapping based on number of classes"""
236
- if num_classes == 3:
237
- return {0: "Negative", 1: "Neutral", 2: "Positive"}
238
- elif num_classes == 4:
239
- # Common 4-class emotion mapping
240
- return {0: "Angry", 1: "Sad", 2: "Happy", 3: "Neutral"}
241
- elif num_classes == 7:
242
- # FER2013 7-class emotion mapping
243
- return {
244
- 0: "Angry",
245
- 1: "Disgust",
246
- 2: "Fear",
247
- 3: "Happy",
248
- 4: "Sad",
249
- 5: "Surprise",
250
- 6: "Neutral",
251
- }
252
- else:
253
- # Generic mapping for unknown number of classes
254
- return {i: f"Class_{i}" for i in range(num_classes)}
255
-
256
-
257
- # Placeholder functions for model predictions
258
- def predict_text_sentiment(text):
259
- """
260
- Analyze text sentiment using TextBlob
261
- """
262
- if not text or text.strip() == "":
263
- return "No text provided", 0.0
264
-
265
- try:
266
- from textblob import TextBlob
267
-
268
- # Create TextBlob object
269
- blob = TextBlob(text)
270
-
271
- # Get polarity (-1 to 1, where -1 is very negative, 1 is very positive)
272
- polarity = blob.sentiment.polarity
273
-
274
- # Get subjectivity (0 to 1, where 0 is very objective, 1 is very subjective)
275
- subjectivity = blob.sentiment.subjectivity
276
-
277
- # Convert polarity to sentiment categories
278
- if polarity > 0.1:
279
- sentiment = "Positive"
280
- confidence = min(0.95, 0.6 + abs(polarity) * 0.3)
281
- elif polarity < -0.1:
282
- sentiment = "Negative"
283
- confidence = min(0.95, 0.6 + abs(polarity) * 0.3)
284
- else:
285
- sentiment = "Neutral"
286
- confidence = 0.7 - abs(polarity) * 0.2
287
-
288
- # Round confidence to 2 decimal places
289
- confidence = round(confidence, 2)
290
-
291
- return sentiment, confidence
292
-
293
- except ImportError:
294
- st.error("TextBlob not installed. Please install it with: pip install textblob")
295
- return "TextBlob not available", 0.0
296
- except Exception as e:
297
- st.error(f"Error in text sentiment analysis: {str(e)}")
298
- return "Error occurred", 0.0
299
-
300
-
301
- @st.cache_resource
302
- def load_audio_model():
303
- """Load the pre-trained Wav2Vec2 audio sentiment model from Google Drive"""
304
- try:
305
- manager = get_model_manager()
306
- if manager is None:
307
- st.error("Model manager not available")
308
- return None, None, None, None
309
-
310
- # Load the model using the Google Drive manager
311
- model, device = manager.load_audio_model()
312
-
313
- if model is None:
314
- st.error("Failed to load audio model from Google Drive")
315
- return None, None, None, None
316
-
317
- # For Wav2Vec2 models, we need to determine the number of classes
318
- # This is typically available in the model configuration
319
- try:
320
- num_classes = model.config.num_labels
321
- except:
322
- # Fallback: try to infer from the model
323
- try:
324
- num_classes = model.classifier.out_features
325
- except:
326
- num_classes = 3 # Default assumption
327
-
328
- # Load feature extractor
329
- from transformers import AutoFeatureExtractor
330
-
331
- feature_extractor = AutoFeatureExtractor.from_pretrained(
332
- "facebook/wav2vec2-base"
333
- )
334
-
335
- st.success(f"Audio model loaded successfully with {num_classes} classes!")
336
- return model, device, num_classes, feature_extractor
337
- except Exception as e:
338
- st.error(f"Error loading audio model: {str(e)}")
339
- return None, None, None, None
340
-
341
-
342
- def predict_audio_sentiment(audio_bytes):
343
- """
344
- Analyze audio sentiment using fine-tuned Wav2Vec2 model
345
- Preprocessing matches CREMA-D + RAVDESS training specifications:
346
- - Target sampling rate: 16kHz
347
- - Max duration: 5.0 seconds
348
- - Feature extraction: AutoFeatureExtractor with max_length, truncation, padding
349
- """
350
- if audio_bytes is None:
351
- return "No audio provided", 0.0
352
-
353
- try:
354
- # Load model if not already loaded
355
- model, device, num_classes, feature_extractor = load_audio_model()
356
- if model is None:
357
- return "Model not loaded", 0.0
358
-
359
- # Load and preprocess audio
360
- import librosa
361
- import tempfile
362
-
363
- # Save audio bytes to temporary file
364
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
365
- tmp_file.write(audio_bytes)
366
- tmp_file_path = tmp_file.name
367
-
368
- try:
369
- # Load audio with librosa
370
- audio, sr = librosa.load(tmp_file_path, sr=None)
371
-
372
- # Resample to 16kHz if needed
373
- if sr != 16000:
374
- audio = librosa.resample(y=audio, orig_sr=sr, target_sr=16000)
375
-
376
- # Preprocess with feature extractor (matching CREMA-D + RAVDESS training exactly)
377
- # From training: max_length=int(max_duration_s * TARGET_SAMPLING_RATE) = 5.0 * 16000
378
- inputs = feature_extractor(
379
- audio,
380
- sampling_rate=16000,
381
- max_length=int(5.0 * 16000), # 5 seconds max (matching training)
382
- truncation=True,
383
- padding="max_length",
384
- return_tensors="pt",
385
- )
386
-
387
- # Move to device
388
- input_values = inputs.input_values.to(device)
389
-
390
- # Run inference
391
- with torch.no_grad():
392
- outputs = model(input_values)
393
- probabilities = torch.softmax(outputs.logits, dim=1)
394
- confidence, predicted = torch.max(probabilities, 1)
395
-
396
- # Get sentiment mapping based on number of classes
397
- if num_classes == 3:
398
- sentiment_map = {0: "Negative", 1: "Neutral", 2: "Positive"}
399
- else:
400
- # Generic mapping for unknown number of classes
401
- sentiment_map = {i: f"Class_{i}" for i in range(num_classes)}
402
-
403
- sentiment = sentiment_map[predicted.item()]
404
- confidence_score = confidence.item()
405
-
406
- return sentiment, confidence_score
407
-
408
- finally:
409
- # Clean up temporary file
410
- os.unlink(tmp_file_path)
411
-
412
- except ImportError as e:
413
- st.error(f"Required library not installed: {str(e)}")
414
- st.info("Please install: pip install librosa transformers")
415
- return "Library not available", 0.0
416
- except Exception as e:
417
- st.error(f"Error in audio sentiment prediction: {str(e)}")
418
- return "Error occurred", 0.0
419
-
420
-
421
- def predict_vision_sentiment(image, crop_tightness=0.05):
422
- """
423
- Load ResNet-50 and run inference for vision sentiment analysis
424
-
425
- Args:
426
- image: Input image (PIL Image or numpy array)
427
- crop_tightness: Padding around face (0.0 = no padding, 0.3 = 30% padding)
428
- """
429
- if image is None:
430
- return "No image provided", 0.0
431
-
432
- try:
433
- # Load model if not already loaded
434
- model, device, num_classes = load_vision_model()
435
- if model is None:
436
- return "Model not loaded", 0.0
437
-
438
- # Preprocess image to match FER2013 format
439
- st.info(
440
- "Detecting face and preprocessing image to match training data format..."
441
- )
442
- preprocessed_image = detect_and_preprocess_face(image, crop_tightness=0.0)
443
-
444
- if preprocessed_image is None:
445
- return "Image preprocessing failed", 0.0
446
-
447
- # Show preprocessed image
448
- st.image(
449
- preprocessed_image,
450
- caption="Preprocessed Image (48x48 Grayscale → 3-channel RGB)",
451
- width=200,
452
- )
453
-
454
- # Get transforms
455
- transform = get_vision_transforms()
456
-
457
- # Convert preprocessed image to tensor
458
- image_tensor = transform(preprocessed_image).unsqueeze(0).to(device)
459
-
460
- # Run inference
461
- with torch.no_grad():
462
- outputs = model(image_tensor)
463
-
464
- # Debug: print output shape
465
- st.info(f"Model output shape: {outputs.shape}")
466
-
467
- probabilities = F.softmax(outputs, dim=1)
468
- confidence, predicted = torch.max(probabilities, 1)
469
-
470
- # Get sentiment mapping based on number of classes
471
- sentiment_map = get_sentiment_mapping(num_classes)
472
- sentiment = sentiment_map[predicted.item()]
473
- confidence_score = confidence.item()
474
-
475
- return sentiment, confidence_score
476
-
477
- except Exception as e:
478
- st.error(f"Error in vision sentiment prediction: {str(e)}")
479
- st.error(
480
- f"Model output shape mismatch. Expected {num_classes} classes but got different."
481
- )
482
- return "Error occurred", 0.0
483
-
484
-
485
- def predict_fused_sentiment(text=None, audio_bytes=None, image=None):
486
- """
487
- TODO: Implement ensemble/fusion logic combining all three models
488
- This is a placeholder function for fused sentiment analysis
489
- """
490
- # Placeholder logic - replace with actual fusion implementation
491
- results = []
492
-
493
- if text:
494
- text_sentiment, text_conf = predict_text_sentiment(text)
495
- results.append((text_sentiment, text_conf))
496
-
497
- if audio_bytes:
498
- audio_sentiment, audio_conf = predict_audio_sentiment(audio_bytes)
499
- results.append((audio_sentiment, audio_conf))
500
-
501
- if image:
502
- vision_sentiment, vision_conf = predict_vision_sentiment(image)
503
- results.append((vision_sentiment, vision_conf))
504
-
505
- if not results:
506
- return "No inputs provided", 0.0
507
-
508
- # Simple ensemble logic (replace with your fusion strategy)
509
- sentiment_counts = {}
510
- total_confidence = 0
511
-
512
- for sentiment, confidence in results:
513
- sentiment_counts[sentiment] = sentiment_counts.get(sentiment, 0) + 1
514
- total_confidence += confidence
515
-
516
- # Majority voting with confidence averaging
517
- final_sentiment = max(sentiment_counts, key=sentiment_counts.get)
518
- avg_confidence = total_confidence / len(results)
519
-
520
- return final_sentiment, avg_confidence
521
-
522
-
523
- def extract_frames_from_video(video_file, max_frames=10):
524
- """
525
- Extract frames from video file for vision sentiment analysis
526
-
527
- Args:
528
- video_file: StreamlitUploadedFile or bytes
529
- max_frames: Maximum number of frames to extract
530
-
531
- Returns:
532
- List of PIL Image objects
533
- """
534
- try:
535
- import cv2
536
- import numpy as np
537
- import tempfile
538
-
539
- # Save video bytes to temporary file
540
- with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_file:
541
- if hasattr(video_file, "getvalue"):
542
- tmp_file.write(video_file.getvalue())
543
- else:
544
- tmp_file.write(video_file)
545
- tmp_file_path = tmp_file.name
546
-
547
- try:
548
- # Open video with OpenCV
549
- cap = cv2.VideoCapture(tmp_file_path)
550
-
551
- if not cap.isOpened():
552
- st.error("Could not open video file")
553
- return []
554
-
555
- frames = []
556
- total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
557
- fps = cap.get(cv2.CAP_PROP_FPS)
558
- duration = total_frames / fps if fps > 0 else 0
559
-
560
- st.info(
561
- f"📹 Video: {total_frames} frames, {fps:.1f} FPS, {duration:.1f}s duration"
562
- )
563
-
564
- # Extract frames at strategic intervals
565
- if total_frames > 0:
566
- # Select frames: start, 25%, 50%, 75%, end
567
- frame_indices = [
568
- 0,
569
- int(total_frames * 0.25),
570
- int(total_frames * 0.5),
571
- int(total_frames * 0.75),
572
- total_frames - 1,
573
- ]
574
- frame_indices = list(set(frame_indices)) # Remove duplicates
575
- frame_indices.sort()
576
-
577
- for frame_idx in frame_indices:
578
- cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
579
- ret, frame = cap.read()
580
- if ret:
581
- # Convert BGR to RGB
582
- frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
583
- # Convert to PIL Image
584
- pil_image = Image.fromarray(frame_rgb)
585
- frames.append(pil_image)
586
-
587
- cap.release()
588
- return frames
589
-
590
- finally:
591
- # Clean up temporary file
592
- os.unlink(tmp_file_path)
593
-
594
- except ImportError:
595
- st.error(
596
- "OpenCV not installed. Please install it with: pip install opencv-python"
597
- )
598
- return []
599
- except Exception as e:
600
- st.error(f"Error extracting frames: {str(e)}")
601
- return []
602
-
603
-
604
- def extract_audio_from_video(video_file):
605
- """
606
- Extract audio from video file for audio sentiment analysis
607
-
608
- Args:
609
- video_file: StreamlitUploadedFile or bytes
610
-
611
- Returns:
612
- Audio bytes in WAV format
613
- """
614
- try:
615
- import tempfile
616
-
617
- try:
618
- from moviepy import VideoFileClip
619
- except ImportError as e:
620
- st.error(f"MoviePy import failed: {e}")
621
- st.error(
622
- "This usually means the Docker build failed to install moviepy properly"
623
- )
624
- return None
625
-
626
- # Save video bytes to temporary file
627
- with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_file:
628
- if hasattr(video_file, "getvalue"):
629
- tmp_file.write(video_file.getvalue())
630
- else:
631
- tmp_file.write(video_file)
632
- tmp_file_path = tmp_file.name
633
-
634
- try:
635
- # Extract audio using moviepy
636
- video = VideoFileClip(tmp_file_path)
637
- audio = video.audio
638
-
639
- if audio is None:
640
- st.warning("No audio track found in video")
641
- return None
642
-
643
- # Save audio to temporary WAV file
644
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as audio_file:
645
- audio_path = audio_file.name
646
-
647
- # Export audio as WAV
648
- audio.write_audiofile(audio_path, logger=None)
649
-
650
- # Read the audio file and return bytes
651
- with open(audio_path, "rb") as f:
652
- audio_bytes = f.read()
653
-
654
- # Clean up temporary audio file
655
- try:
656
- os.unlink(audio_path)
657
- except (OSError, PermissionError):
658
- # File might be in use, skip cleanup
659
- pass
660
-
661
- return audio_bytes
662
-
663
- finally:
664
- # Clean up temporary video file
665
- try:
666
- # Close video and audio objects first
667
- if "video" in locals():
668
- video.close()
669
- if "audio" in locals() and audio:
670
- audio.close()
671
-
672
- # Wait a bit before trying to delete
673
- import time
674
-
675
- time.sleep(0.1)
676
-
677
- os.unlink(tmp_file_path)
678
- except (OSError, PermissionError):
679
- # File might be in use, skip cleanup
680
- pass
681
-
682
- except ImportError:
683
- st.error("MoviePy not installed. Please install it with: pip install moviepy")
684
- return None
685
- except Exception as e:
686
- st.error(f"Error extracting audio: {str(e)}")
687
- return None
688
-
689
-
690
- def transcribe_audio(audio_bytes):
691
- """
692
- Transcribe audio to text for text sentiment analysis
693
-
694
- Args:
695
- audio_bytes: Audio bytes in WAV format
696
-
697
- Returns:
698
- Transcribed text string
699
- """
700
- if audio_bytes is None:
701
- return ""
702
-
703
- try:
704
- import tempfile
705
- import speech_recognition as sr
706
-
707
- # Save audio bytes to temporary file
708
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
709
- tmp_file.write(audio_bytes)
710
- tmp_file_path = tmp_file.name
711
-
712
- try:
713
- # Initialize recognizer
714
- recognizer = sr.Recognizer()
715
-
716
- # Load audio file
717
- with sr.AudioFile(tmp_file_path) as source:
718
- # Read audio data
719
- audio_data = recognizer.record(source)
720
-
721
- # Transcribe using Google Speech Recognition
722
- try:
723
- text = recognizer.recognize_google(audio_data)
724
- return text
725
- except sr.UnknownValueError:
726
- st.warning("Speech could not be understood")
727
- return ""
728
- except sr.RequestError as e:
729
- st.error(
730
- f"Could not request results from speech recognition service: {e}"
731
- )
732
- return ""
733
-
734
- finally:
735
- # Clean up temporary file
736
- os.unlink(tmp_file_path)
737
 
738
- except ImportError:
739
- st.error(
740
- "SpeechRecognition not installed. Please install it with: pip install SpeechRecognition"
741
- )
742
- return ""
743
- except Exception as e:
744
- st.error(f"Error transcribing audio: {str(e)}")
745
- return ""
746
-
747
-
748
- # Sidebar navigation
749
- st.sidebar.title("Sentiment Analysis")
750
- st.sidebar.markdown("---")
751
-
752
- # Navigation
753
- page = st.sidebar.selectbox(
754
- "Choose a page:",
755
- [
756
- "Home",
757
- "Text Sentiment",
758
- "Audio Sentiment",
759
- "Vision Sentiment",
760
- "Fused Model",
761
- "Max Fusion",
762
- ],
763
- )
764
 
765
- # Home Page
766
- if page == "Home":
767
  st.markdown(
768
- '<h1 class="main-header">Multimodal Sentiment Analysis</h1>',
769
  unsafe_allow_html=True,
770
  )
771
 
772
  st.markdown(
773
  """
774
- <div class="model-card">
775
- <h2>Welcome to your Multi-Modal Sentiment Analysis Testing Platform!</h2>
776
- <p>This application provides a comprehensive testing environment for your three independent sentiment analysis models:</p>
777
- </div>
778
- """,
779
  unsafe_allow_html=True,
780
  )
781
 
@@ -784,105 +70,106 @@ if page == "Home":
784
  with col1:
785
  st.markdown(
786
  """
787
- <div class="model-card">
788
- <h3>Text Sentiment Model</h3>
789
- <p>READY TO USE - Analyze sentiment from text input using TextBlob</p>
790
- <ul>
791
- <li>Process any text input</li>
792
- <li>Get sentiment classification (Positive/Negative/Neutral)</li>
793
- <li>View confidence scores</li>
794
- <li>Real-time NLP analysis</li>
795
- </ul>
796
- </div>
797
- """,
798
  unsafe_allow_html=True,
799
  )
800
 
801
  with col2:
802
  st.markdown(
803
  """
804
- <div class="model-card">
805
- <h3>Audio Sentiment Model</h3>
806
- <p>READY TO USE - Analyze sentiment from audio files using fine-tuned Wav2Vec2</p>
807
- <ul>
808
- <li>Upload audio files (.wav, .mp3, .m4a, .flac)</li>
809
- <li>Record audio directly with microphone (max 5s)</li>
810
- <li>Automatic preprocessing: 16kHz sampling, 5s max duration (CREMA-D + RAVDESS format)</li>
811
- <li>Listen to uploaded/recorded audio</li>
812
- <li>Get sentiment predictions</li>
813
- <li>Real-time audio analysis</li>
814
- </ul>
815
- </div>
816
- """,
817
  unsafe_allow_html=True,
818
  )
819
 
820
  with col3:
821
  st.markdown(
822
  """
823
- <div class="model-card">
824
- <h3>Vision Sentiment Model</h3>
825
- <p>Analyze sentiment from images using fine-tuned ResNet-50</p>
826
- <ul>
827
- <li>Upload image files (.png, .jpg, .jpeg, .bmp, .tiff)</li>
828
- <li>Automatic face detection & preprocessing</li>
829
- <li>Fixed 0% padding for tightest face crop</li>
830
- <li>Convert to 224x224 grayscale → 3-channel RGB (FER2013 format)</li>
831
- <li>Transforms: Resize(224) → CenterCrop(224) → ImageNet Normalization</li>
832
- <li>Preview original & preprocessed images</li>
833
- <li>Get sentiment predictions</li>
834
- </ul>
835
- </div>
836
- """,
837
  unsafe_allow_html=True,
838
  )
839
 
840
  st.markdown(
841
  """
842
- <div class="model-card">
843
- <h3>Fused Model</h3>
844
- <p>Combine predictions from all three models for enhanced accuracy</p>
845
- <ul>
846
- <li>Multi-modal input processing</li>
847
- <li>Ensemble prediction strategies</li>
848
- <li>Comprehensive sentiment analysis</li>
849
- </ul>
850
- </div>
851
- """,
852
  unsafe_allow_html=True,
853
  )
854
 
855
  st.markdown(
856
  """
857
- <div class="model-card">
858
- <h3>🎬 Max Fusion</h3>
859
- <p>Ultimate video-based sentiment analysis combining all three modalities</p>
860
- <ul>
861
- <li>🎥 Record or upload 5-second videos</li>
862
- <li>🔍 Extract frames for vision analysis</li>
863
- <li>🎵 Extract audio for vocal sentiment</li>
864
- <li>📝 Transcribe audio for text analysis</li>
865
- <li>🚀 Comprehensive multi-modal results</li>
866
- </ul>
867
- </div>
868
- """,
869
  unsafe_allow_html=True,
870
  )
871
 
872
  st.markdown("---")
873
  st.markdown(
874
  """
875
- <div style="text-align: center; color: #666;">
876
- <p><strong>Note:</strong> This application now has <strong>ALL THREE MODELS</strong> fully integrated and ready to use!</p>
877
- <p><strong>TextBlob</strong> (Text) + <strong>Wav2Vec2</strong> (Audio) + <strong>ResNet-50</strong> (Vision)</p>
878
- <p><strong>Models are now loaded from Google Drive automatically!</strong></p>
879
- </div>
880
- """,
881
  unsafe_allow_html=True,
882
  )
883
 
884
- # Text Sentiment Page
885
- elif page == "Text Sentiment":
 
886
  st.title("Text Sentiment Analysis")
887
  st.markdown("Analyze the sentiment of your text using our TextBlob-based model.")
888
 
@@ -910,28 +197,26 @@ elif page == "Text Sentiment":
910
  st.metric("Confidence", f"{confidence:.2f}")
911
 
912
  # Color-coded sentiment display
913
- sentiment_colors = {
914
- "Positive": "🟢",
915
- "Negative": "🔴",
916
- "Neutral": "🟡",
917
- }
918
 
919
  st.markdown(
920
  f"""
921
- <div class="result-box">
922
- <h4>{sentiment_colors.get(sentiment, "❓")} Sentiment: {sentiment}</h4>
923
- <p><strong>Confidence:</strong> {confidence:.2f}</p>
924
- <p><strong>Input Text:</strong> "{text_input[:100]}{'...' if len(text_input) > 100 else ''}"</p>
925
- <p><strong>Model:</strong> TextBlob (Natural Language Processing)</p>
926
- </div>
927
- """,
928
  unsafe_allow_html=True,
929
  )
930
  else:
931
  st.error("Please enter some text to analyze.")
932
 
933
- # Audio Sentiment Page
934
- elif page == "Audio Sentiment":
 
935
  st.title("Audio Sentiment Analysis")
936
  st.markdown(
937
  "Analyze the sentiment of your audio files using our fine-tuned Wav2Vec2 model."
@@ -969,7 +254,7 @@ elif page == "Audio Sentiment":
969
  # File uploader
970
  uploaded_audio = st.file_uploader(
971
  "Choose an audio file",
972
- type=["wav", "mp3", "m4a", "flac"],
973
  help="Supported formats: WAV, MP3, M4A, FLAC",
974
  )
975
 
@@ -979,12 +264,12 @@ elif page == "Audio Sentiment":
979
  else: # Audio recording
980
  st.markdown(
981
  """
982
- <div class="model-card">
983
- <h3>Audio Recording</h3>
984
- <p>Record audio directly with your microphone (max 5 seconds).</p>
985
- <p><strong>Note:</strong> Make sure your microphone is accessible and you have permission to use it.</p>
986
- </div>
987
- """,
988
  unsafe_allow_html=True,
989
  )
990
 
@@ -1018,8 +303,10 @@ elif page == "Audio Sentiment":
1018
  uploaded_audio, format=f'audio/{uploaded_audio.name.split(".")[-1]}'
1019
  )
1020
  # File info for uploaded files
1021
- file_size = len(uploaded_audio.getvalue()) / 1024 # KB
1022
- st.info(f"File: {uploaded_audio.name} | Size: {file_size:.1f} KB")
 
 
1023
 
1024
  # Analyze button
1025
  if st.button(
@@ -1042,17 +329,18 @@ elif page == "Audio Sentiment":
1042
  st.metric("Confidence", f"{confidence:.2f}")
1043
 
1044
  # Color-coded sentiment display
1045
- sentiment_colors = {"Positive": "🟢", "Negative": "🔴", "Neutral": "🟡"}
 
1046
 
1047
  st.markdown(
1048
  f"""
1049
- <div class="result-box">
1050
- <h4>{sentiment_colors.get(sentiment, "❓")} Sentiment: {sentiment}</h4>
1051
- <p><strong>Confidence:</strong> {confidence:.2f}</p>
1052
- <p><strong>Audio Source:</strong> {audio_name}</p>
1053
- <p><strong>Model:</strong> Wav2Vec2 (Fine-tuned on RAVDESS + CREMA-D)</p>
1054
- </div>
1055
- """,
1056
  unsafe_allow_html=True,
1057
  )
1058
  else:
@@ -1061,8 +349,9 @@ elif page == "Audio Sentiment":
1061
  else:
1062
  st.info("Click the microphone button above to record audio for analysis.")
1063
 
1064
- # Vision Sentiment Page
1065
- elif page == "Vision Sentiment":
 
1066
  st.title("Vision Sentiment Analysis")
1067
  st.markdown(
1068
  "Analyze the sentiment of your images using our fine-tuned ResNet-50 model."
@@ -1101,7 +390,7 @@ elif page == "Vision Sentiment":
1101
  # File uploader
1102
  uploaded_image = st.file_uploader(
1103
  "Choose an image file",
1104
- type=["png", "jpg", "jpeg", "bmp", "tiff"],
1105
  help="Supported formats: PNG, JPG, JPEG, BMP, TIFF",
1106
  )
1107
 
@@ -1115,9 +404,9 @@ elif page == "Vision Sentiment":
1115
  )
1116
 
1117
  # File info
1118
- file_size = len(uploaded_image.getvalue()) / 1024 # KB
1119
  st.info(
1120
- f"File: {uploaded_image.name} | Size: {file_size:.1f} KB | Dimensions: {image.size[0]}x{image.size[1]}"
1121
  )
1122
 
1123
  # Analyze button
@@ -1140,33 +429,30 @@ elif page == "Vision Sentiment":
1140
  st.metric("Confidence", f"{confidence:.2f}")
1141
 
1142
  # Color-coded sentiment display
1143
- sentiment_colors = {
1144
- "Positive": "🟢",
1145
- "Negative": "🔴",
1146
- "Neutral": "🟡",
1147
- }
1148
 
1149
  st.markdown(
1150
  f"""
1151
- <div class="result-box">
1152
- <h4>{sentiment_colors.get(sentiment, "❓")} Sentiment: {sentiment}</h4>
1153
- <p><strong>Confidence:</strong> {confidence:.2f}</p>
1154
- <p><strong>Image File:</strong> {uploaded_image.name}</p>
1155
- <p><strong>Model:</strong> ResNet-50 (Fine-tuned on FER2013)</p>
1156
- </div>
1157
- """,
1158
  unsafe_allow_html=True,
1159
  )
1160
 
1161
  else: # Camera capture
1162
  st.markdown(
1163
  """
1164
- <div class="model-card">
1165
- <h3>Camera Capture</h3>
1166
- <p>Take a photo directly with your camera to analyze its sentiment.</p>
1167
- <p><strong>Note:</strong> Make sure your camera is accessible and you have permission to use it.</p>
1168
- </div>
1169
- """,
1170
  unsafe_allow_html=True,
1171
  )
1172
 
@@ -1210,21 +496,18 @@ elif page == "Vision Sentiment":
1210
  st.metric("Confidence", f"{confidence:.2f}")
1211
 
1212
  # Color-coded sentiment display
1213
- sentiment_colors = {
1214
- "Positive": "🟢",
1215
- "Negative": "🔴",
1216
- "Neutral": "🟡",
1217
- }
1218
 
1219
  st.markdown(
1220
  f"""
1221
- <div class="result-box">
1222
- <h4>{sentiment_colors.get(sentiment, "❓")} Sentiment: {sentiment}</h4>
1223
- <p><strong>Confidence:</strong> {confidence:.2f}</p>
1224
- <p><strong>Image Source:</strong> Camera Capture</p>
1225
- <p><strong>Model:</strong> ResNet-50 (Fine-tuned on FER2013)</p>
1226
- </div>
1227
- """,
1228
  unsafe_allow_html=True,
1229
  )
1230
 
@@ -1234,8 +517,9 @@ elif page == "Vision Sentiment":
1234
  elif input_method == "Take Photo with Camera" and "camera_photo" not in locals():
1235
  st.info("Click the camera button above to take a photo for analysis.")
1236
 
1237
- # Fused Model Page
1238
- elif page == "Fused Model":
 
1239
  st.title("Fused Model Analysis")
1240
  st.markdown(
1241
  "Combine predictions from all three models for enhanced sentiment analysis."
@@ -1243,12 +527,12 @@ elif page == "Fused Model":
1243
 
1244
  st.markdown(
1245
  """
1246
- <div class="model-card">
1247
- <h3>Multi-Modal Sentiment Analysis</h3>
1248
- <p>This page allows you to input text, audio, and/or image data to get a comprehensive sentiment analysis
1249
- using all three models combined.</p>
1250
- </div>
1251
- """,
1252
  unsafe_allow_html=True,
1253
  )
1254
 
@@ -1282,7 +566,7 @@ elif page == "Fused Model":
1282
  if audio_input_method == "Upload File":
1283
  uploaded_audio = st.file_uploader(
1284
  "Upload audio file (optional):",
1285
- type=["wav", "mp3", "m4a", "flac"],
1286
  key="fused_audio",
1287
  )
1288
  audio_source = "uploaded_file"
@@ -1325,7 +609,7 @@ elif page == "Fused Model":
1325
  if image_input_method == "Upload File":
1326
  uploaded_image = st.file_uploader(
1327
  "Upload image file (optional):",
1328
- type=["png", "jpg", "jpeg", "bmp", "tiff"],
1329
  key="fused_image",
1330
  )
1331
 
@@ -1421,16 +705,17 @@ elif page == "Fused Model":
1421
  st.dataframe(df, use_container_width=True)
1422
 
1423
  # Final result display
1424
- sentiment_colors = {"Positive": "🟢", "Negative": "🔴", "Neutral": "🟡"}
 
1425
 
1426
  st.markdown(
1427
  f"""
1428
- <div class="result-box">
1429
- <h4>{sentiment_colors.get(sentiment, "❓")} Final Fused Sentiment: {sentiment}</h4>
1430
- <p><strong>Overall Confidence:</strong> {confidence:.2f}</p>
1431
- <p><strong>Models Used:</strong> {len(results_data)}</p>
1432
- </div>
1433
- """,
1434
  unsafe_allow_html=True,
1435
  )
1436
  else:
@@ -1438,21 +723,22 @@ elif page == "Fused Model":
1438
  "Please provide at least one input (text, audio, or image) for fused analysis."
1439
  )
1440
 
1441
- # Max Fusion Page
1442
- elif page == "Max Fusion":
 
1443
  st.title("Max Fusion - Multi-Modal Sentiment Analysis")
1444
  st.markdown(
1445
  """
1446
- <div class="model-card">
1447
- <h3>Ultimate Multi-Modal Sentiment Analysis</h3>
1448
- <p>Take photos with camera or upload videos to get comprehensive sentiment analysis from multiple modalities:</p>
1449
- <ul>
1450
- <li>📸 <strong>Vision Analysis:</strong> Camera photos or video frames for facial expression analysis</li>
1451
- <li>🎵 <strong>Audio Analysis:</strong> Audio files or extracted audio from videos for vocal sentiment</li>
1452
- <li>📝 <strong>Text Analysis:</strong> Transcribed audio for text sentiment analysis</li>
1453
- </ul>
1454
- </div>
1455
- """,
1456
  unsafe_allow_html=True,
1457
  )
1458
 
@@ -1476,25 +762,25 @@ elif page == "Max Fusion":
1476
  with col2:
1477
  st.markdown(
1478
  """
1479
- <div style="text-align: center; padding: 20px; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); border-radius: 10px; color: white;">
1480
- <h3>🚧 Coming Soon 🚧</h3>
1481
- <p>Video recording feature is under development</p>
1482
- <p>Use Upload Video File for now!</p>
1483
- </div>
1484
- """,
1485
  unsafe_allow_html=True,
1486
  )
1487
 
1488
  # Placeholder for future recording functionality
1489
  st.markdown(
1490
  """
1491
- **Future Features:**
1492
- - Real-time video recording with camera
1493
- - Audio capture during recording
1494
- - Automatic frame extraction
1495
- - Live transcription
1496
- - WebRTC integration for low-latency streaming
1497
- """
1498
  )
1499
 
1500
  # Skip all the recording logic for now
@@ -1507,19 +793,19 @@ elif page == "Max Fusion":
1507
  # File upload option
1508
  st.markdown(
1509
  """
1510
- <div class="upload-section">
1511
- <h4>📁 Upload Video File</h4>
1512
- <p>Upload a video file for comprehensive multimodal analysis.</p>
1513
- <p><strong>Supported Formats:</strong> MP4, AVI, MOV, MKV, WMV, FLV</p>
1514
- <p><strong>Recommended:</strong> Videos with clear audio and visual content</p>
1515
- </div>
1516
- """,
1517
  unsafe_allow_html=True,
1518
  )
1519
 
1520
  uploaded_video = st.file_uploader(
1521
  "Choose a video file",
1522
- type=["mp4", "avi", "mov", "mkv", "wmv", "flv"],
1523
  help="Supported formats: MP4, AVI, MOV, MKV, WMV, FLV",
1524
  )
1525
 
@@ -1527,8 +813,6 @@ elif page == "Max Fusion":
1527
  video_name = uploaded_video.name if uploaded_video else None
1528
  video_file = uploaded_video
1529
 
1530
- # Video recording using streamlit-webrtc component - COMING SOON
1531
-
1532
  if video_file is not None:
1533
  # Display video or photo
1534
  if video_source == "camera_photo":
@@ -1543,7 +827,7 @@ elif page == "Max Fusion":
1543
 
1544
  uploaded_audio = st.file_uploader(
1545
  "Upload audio file for audio analysis:",
1546
- type=["wav", "mp3", "m4a", "flac"],
1547
  key="camera_audio",
1548
  help="Upload an audio file to complement the photo analysis",
1549
  )
@@ -1561,11 +845,10 @@ elif page == "Max Fusion":
1561
  else:
1562
  # For uploaded videos
1563
  st.video(video_file)
1564
- if hasattr(video_file, "getvalue"):
1565
- file_size = len(video_file.getvalue()) / 1024 # KB
1566
- else:
1567
- file_size = len(video_file) / 1024 # KB
1568
- st.info(f"File: {video_name} | Size: {file_size:.1f} KB")
1569
  audio_bytes = None # Will be extracted from video
1570
 
1571
  # Video Processing Pipeline
@@ -1709,20 +992,17 @@ elif page == "Max Fusion":
1709
  st.metric("📊 Overall Confidence", f"{confidence:.2f}")
1710
 
1711
  # Color-coded sentiment display
1712
- sentiment_colors = {
1713
- "Positive": "🟢",
1714
- "Negative": "🔴",
1715
- "Neutral": "🟡",
1716
- }
1717
 
1718
  st.markdown(
1719
  f"""
1720
- <div class="result-box">
1721
- <h4>{sentiment_colors.get(sentiment, "❓")} Max Fusion Sentiment: {sentiment}</h4>
1722
- <p><strong>Overall Confidence:</strong> {confidence:.2f}</p>
1723
- <p><strong>Modalities Analyzed:</strong> {len(results_data)}</p>
1724
- <p><strong>Video Source:</strong> {video_name}</p>
1725
- <p><strong>Analysis Type:</strong> Comprehensive Multi-Modal Sentiment Analysis</p>
1726
  </div>
1727
  """,
1728
  unsafe_allow_html=True,
@@ -1740,13 +1020,54 @@ elif page == "Max Fusion":
1740
  else:
1741
  st.info("📁 Please upload a video file to begin Max Fusion analysis.")
1742
 
1743
- # Footer
1744
- st.markdown("---")
1745
- st.markdown(
1746
- """
1747
- <div style="text-align: center; color: #666; padding: 1rem;">
1748
- <p>Built with ❤️ | by <a href="https://github.com/iamfaham">iamfaham</a></p>
1749
- </div>
1750
- """,
1751
- unsafe_allow_html=True,
1752
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Refactored Sentiment Fused - Multimodal Sentiment Analysis Application
3
+
4
+ This is the main entry point for the application, now using a modular structure.
5
+ """
6
+
7
  import streamlit as st
8
  import pandas as pd
9
  from PIL import Image
10
+ import logging
11
+
12
+ # Import our modular components
13
+ from src.config.settings import (
14
+ APP_NAME,
15
+ APP_VERSION,
16
+ APP_ICON,
17
+ APP_LAYOUT,
18
+ CUSTOM_CSS,
19
+ SUPPORTED_IMAGE_FORMATS,
20
+ SUPPORTED_AUDIO_FORMATS,
21
+ SUPPORTED_VIDEO_FORMATS,
22
+ )
23
+ from src.models.text_model import predict_text_sentiment
24
+ from src.models.audio_model import predict_audio_sentiment, load_audio_model
25
+ from src.models.vision_model import predict_vision_sentiment, load_vision_model
26
+ from src.models.fused_model import predict_fused_sentiment
27
+ from src.utils.preprocessing import (
28
+ extract_frames_from_video,
29
+ extract_audio_from_video,
30
+ transcribe_audio,
31
+ )
32
+ from src.utils.file_handling import get_file_info, format_file_size
33
+ from src.utils.sentiment_mapping import get_sentiment_colors, format_sentiment_result
34
 
35
+ # Configure logging
36
+ logging.basicConfig(level=logging.INFO)
37
+ logger = logging.getLogger(__name__)
38
 
39
  # Page configuration
40
  st.set_page_config(
41
+ page_title=APP_NAME,
42
+ page_icon=APP_ICON,
43
+ layout=APP_LAYOUT,
44
  initial_sidebar_state="expanded",
45
  )
46
 
47
+ # Apply custom CSS
48
+ st.markdown(CUSTOM_CSS, unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
+ def render_home_page():
52
+ """Render the home page with model information."""
53
  st.markdown(
54
+ f'<h1 class="main-header">{APP_NAME}</h1>',
55
  unsafe_allow_html=True,
56
  )
57
 
58
  st.markdown(
59
  """
60
+ <div class="model-card">
61
+ <h2>Welcome to your Multi-Modal Sentiment Analysis Testing Platform!</h2>
62
+ <p>This application provides a comprehensive testing environment for your three independent sentiment analysis models:</p>
63
+ </div>
64
+ """,
65
  unsafe_allow_html=True,
66
  )
67
 
 
70
  with col1:
71
  st.markdown(
72
  """
73
+ <div class="model-card">
74
+ <h3>Text Sentiment Model</h3>
75
+ <p>READY TO USE - Analyze sentiment from text input using TextBlob</p>
76
+ <ul>
77
+ <li>Process any text input</li>
78
+ <li>Get sentiment classification (Positive/Negative/Neutral)</li>
79
+ <li>View confidence scores</li>
80
+ <li>Real-time NLP analysis</li>
81
+ </ul>
82
+ </div>
83
+ """,
84
  unsafe_allow_html=True,
85
  )
86
 
87
  with col2:
88
  st.markdown(
89
  """
90
+ <div class="model-card">
91
+ <h3>Audio Sentiment Model</h3>
92
+ <p>READY TO USE - Analyze sentiment from audio files using fine-tuned Wav2Vec2</p>
93
+ <ul>
94
+ <li>Upload audio files (.wav, .mp3, .m4a, .flac)</li>
95
+ <li>Record audio directly with microphone (max 5s)</li>
96
+ <li>Automatic preprocessing: 16kHz sampling, 5s max duration</li>
97
+ <li>Listen to uploaded/recorded audio</li>
98
+ <li>Get sentiment predictions</li>
99
+ <li>Real-time audio analysis</li>
100
+ </ul>
101
+ </div>
102
+ """,
103
  unsafe_allow_html=True,
104
  )
105
 
106
  with col3:
107
  st.markdown(
108
  """
109
+ <div class="model-card">
110
+ <h3>Vision Sentiment Model</h3>
111
+ <p>Analyze sentiment from images using fine-tuned ResNet-50</p>
112
+ <ul>
113
+ <li>Upload image files (.png, .jpg, .jpeg, .bmp, .tiff)</li>
114
+ <li>Automatic face detection & preprocessing</li>
115
+ <li>Fixed 0% padding for tightest face crop</li>
116
+ <li>Convert to 224x224 grayscale → 3-channel RGB (FER2013 format)</li>
117
+ <li>Transforms: Resize(224) → CenterCrop(224) → ImageNet Normalization</li>
118
+ <li>Preview original & preprocessed images</li>
119
+ <li>Get sentiment predictions</li>
120
+ </ul>
121
+ </div>
122
+ """,
123
  unsafe_allow_html=True,
124
  )
125
 
126
  st.markdown(
127
  """
128
+ <div class="model-card">
129
+ <h3>Fused Model</h3>
130
+ <p>Combine predictions from all three models for enhanced accuracy</p>
131
+ <ul>
132
+ <li>Multi-modal input processing</li>
133
+ <li>Ensemble prediction strategies</li>
134
+ <li>Comprehensive sentiment analysis</li>
135
+ </ul>
136
+ </div>
137
+ """,
138
  unsafe_allow_html=True,
139
  )
140
 
141
  st.markdown(
142
  """
143
+ <div class="model-card">
144
+ <h3>🎬 Max Fusion</h3>
145
+ <p>Ultimate video-based sentiment analysis combining all three modalities</p>
146
+ <ul>
147
+ <li>🎥 Record or upload 5-second videos</li>
148
+ <li>🔍 Extract frames for vision analysis</li>
149
+ <li>🎵 Extract audio for vocal sentiment</li>
150
+ <li>📝 Transcribe audio for text analysis</li>
151
+ <li>🚀 Comprehensive multi-modal results</li>
152
+ </ul>
153
+ </div>
154
+ """,
155
  unsafe_allow_html=True,
156
  )
157
 
158
  st.markdown("---")
159
  st.markdown(
160
  """
161
+ <div style="text-align: center; color: #666;">
162
+ <p><strong>Note:</strong> This application now has <strong>ALL THREE MODELS</strong> fully integrated and ready to use!</p>
163
+ <p><strong>TextBlob</strong> (Text) + <strong>Wav2Vec2</strong> (Audio) + <strong>ResNet-50</strong> (Vision)</p>
164
+ <p><strong>Models are now loaded from Google Drive automatically!</strong></p>
165
+ </div>
166
+ """,
167
  unsafe_allow_html=True,
168
  )
169
 
170
+
171
+ def render_text_sentiment_page():
172
+ """Render the text sentiment analysis page."""
173
  st.title("Text Sentiment Analysis")
174
  st.markdown("Analyze the sentiment of your text using our TextBlob-based model.")
175
 
 
197
  st.metric("Confidence", f"{confidence:.2f}")
198
 
199
  # Color-coded sentiment display
200
+ sentiment_colors = get_sentiment_colors()
201
+ emoji = sentiment_colors.get(sentiment, "")
 
 
 
202
 
203
  st.markdown(
204
  f"""
205
+ <div class="result-box">
206
+ <h4>{emoji} Sentiment: {sentiment}</h4>
207
+ <p><strong>Confidence:</strong> {confidence:.2f}</p>
208
+ <p><strong>Input Text:</strong> "{text_input[:100]}{'...' if len(text_input) > 100 else ''}"</p>
209
+ <p><strong>Model:</strong> TextBlob (Natural Language Processing)</p>
210
+ </div>
211
+ """,
212
  unsafe_allow_html=True,
213
  )
214
  else:
215
  st.error("Please enter some text to analyze.")
216
 
217
+
218
+ def render_audio_sentiment_page():
219
+ """Render the audio sentiment analysis page."""
220
  st.title("Audio Sentiment Analysis")
221
  st.markdown(
222
  "Analyze the sentiment of your audio files using our fine-tuned Wav2Vec2 model."
 
254
  # File uploader
255
  uploaded_audio = st.file_uploader(
256
  "Choose an audio file",
257
+ type=SUPPORTED_AUDIO_FORMATS,
258
  help="Supported formats: WAV, MP3, M4A, FLAC",
259
  )
260
 
 
264
  else: # Audio recording
265
  st.markdown(
266
  """
267
+ <div class="model-card">
268
+ <h3>Audio Recording</h3>
269
+ <p>Record audio directly with your microphone (max 5 seconds).</p>
270
+ <p><strong>Note:</strong> Make sure your microphone is accessible and you have permission to use it.</p>
271
+ </div>
272
+ """,
273
  unsafe_allow_html=True,
274
  )
275
 
 
303
  uploaded_audio, format=f'audio/{uploaded_audio.name.split(".")[-1]}'
304
  )
305
  # File info for uploaded files
306
+ file_info = get_file_info(uploaded_audio)
307
+ st.info(
308
+ f"File: {file_info['name']} | Size: {format_file_size(file_info['size_bytes'])}"
309
+ )
310
 
311
  # Analyze button
312
  if st.button(
 
329
  st.metric("Confidence", f"{confidence:.2f}")
330
 
331
  # Color-coded sentiment display
332
+ sentiment_colors = get_sentiment_colors()
333
+ emoji = sentiment_colors.get(sentiment, "❓")
334
 
335
  st.markdown(
336
  f"""
337
+ <div class="result-box">
338
+ <h4>{emoji} Sentiment: {sentiment}</h4>
339
+ <p><strong>Confidence:</strong> {confidence:.2f}</p>
340
+ <p><strong>Audio Source:</strong> {audio_name}</p>
341
+ <p><strong>Model:</strong> Wav2Vec2 (Fine-tuned on RAVDESS + CREMA-D)</p>
342
+ </div>
343
+ """,
344
  unsafe_allow_html=True,
345
  )
346
  else:
 
349
  else:
350
  st.info("Click the microphone button above to record audio for analysis.")
351
 
352
+
353
+ def render_vision_sentiment_page():
354
+ """Render the vision sentiment analysis page."""
355
  st.title("Vision Sentiment Analysis")
356
  st.markdown(
357
  "Analyze the sentiment of your images using our fine-tuned ResNet-50 model."
 
390
  # File uploader
391
  uploaded_image = st.file_uploader(
392
  "Choose an image file",
393
+ type=SUPPORTED_IMAGE_FORMATS,
394
  help="Supported formats: PNG, JPG, JPEG, BMP, TIFF",
395
  )
396
 
 
404
  )
405
 
406
  # File info
407
+ file_info = get_file_info(uploaded_image)
408
  st.info(
409
+ f"File: {file_info['name']} | Size: {format_file_size(file_info['size_bytes'])} | Dimensions: {image.size[0]}x{image.size[1]}"
410
  )
411
 
412
  # Analyze button
 
429
  st.metric("Confidence", f"{confidence:.2f}")
430
 
431
  # Color-coded sentiment display
432
+ sentiment_colors = get_sentiment_colors()
433
+ emoji = sentiment_colors.get(sentiment, "")
 
 
 
434
 
435
  st.markdown(
436
  f"""
437
+ <div class="result-box">
438
+ <h4>{emoji} Sentiment: {sentiment}</h4>
439
+ <p><strong>Confidence:</strong> {confidence:.2f}</p>
440
+ <p><strong>Image File:</strong> {uploaded_image.name}</p>
441
+ <p><strong>Model:</strong> ResNet-50 (Fine-tuned on FER2013)</p>
442
+ </div>
443
+ """,
444
  unsafe_allow_html=True,
445
  )
446
 
447
  else: # Camera capture
448
  st.markdown(
449
  """
450
+ <div class="model-card">
451
+ <h3>Camera Capture</h3>
452
+ <p>Take a photo directly with your camera to analyze its sentiment.</p>
453
+ <p><strong>Note:</strong> Make sure your camera is accessible and you have permission to use it.</p>
454
+ </div>
455
+ """,
456
  unsafe_allow_html=True,
457
  )
458
 
 
496
  st.metric("Confidence", f"{confidence:.2f}")
497
 
498
  # Color-coded sentiment display
499
+ sentiment_colors = get_sentiment_colors()
500
+ emoji = sentiment_colors.get(sentiment, "")
 
 
 
501
 
502
  st.markdown(
503
  f"""
504
+ <div class="result-box">
505
+ <h4>{emoji} Sentiment: {sentiment}</h4>
506
+ <p><strong>Confidence:</strong> {confidence:.2f}</p>
507
+ <p><strong>Image Source:</strong> Camera Capture</p>
508
+ <p><strong>Model:</strong> ResNet-50 (Fine-tuned on FER2013)</p>
509
+ </div>
510
+ """,
511
  unsafe_allow_html=True,
512
  )
513
 
 
517
  elif input_method == "Take Photo with Camera" and "camera_photo" not in locals():
518
  st.info("Click the camera button above to take a photo for analysis.")
519
 
520
+
521
+ def render_fused_model_page():
522
+ """Render the fused model analysis page."""
523
  st.title("Fused Model Analysis")
524
  st.markdown(
525
  "Combine predictions from all three models for enhanced sentiment analysis."
 
527
 
528
  st.markdown(
529
  """
530
+ <div class="model-card">
531
+ <h3>Multi-Modal Sentiment Analysis</h3>
532
+ <p>This page allows you to input text, audio, and/or image data to get a comprehensive sentiment analysis
533
+ using all three models combined.</p>
534
+ </div>
535
+ """,
536
  unsafe_allow_html=True,
537
  )
538
 
 
566
  if audio_input_method == "Upload File":
567
  uploaded_audio = st.file_uploader(
568
  "Upload audio file (optional):",
569
+ type=SUPPORTED_AUDIO_FORMATS,
570
  key="fused_audio",
571
  )
572
  audio_source = "uploaded_file"
 
609
  if image_input_method == "Upload File":
610
  uploaded_image = st.file_uploader(
611
  "Upload image file (optional):",
612
+ type=SUPPORTED_IMAGE_FORMATS,
613
  key="fused_image",
614
  )
615
 
 
705
  st.dataframe(df, use_container_width=True)
706
 
707
  # Final result display
708
+ sentiment_colors = get_sentiment_colors()
709
+ emoji = sentiment_colors.get(sentiment, "❓")
710
 
711
  st.markdown(
712
  f"""
713
+ <div class="result-box">
714
+ <h4>{emoji} Final Fused Sentiment: {sentiment}</h4>
715
+ <p><strong>Overall Confidence:</strong> {confidence:.2f}</p>
716
+ <p><strong>Models Used:</strong> {len(results_data)}</p>
717
+ </div>
718
+ """,
719
  unsafe_allow_html=True,
720
  )
721
  else:
 
723
  "Please provide at least one input (text, audio, or image) for fused analysis."
724
  )
725
 
726
+
727
+ def render_max_fusion_page():
728
+ """Render the max fusion page for video-based analysis."""
729
  st.title("Max Fusion - Multi-Modal Sentiment Analysis")
730
  st.markdown(
731
  """
732
+ <div class="model-card">
733
+ <h3>Ultimate Multi-Modal Sentiment Analysis</h3>
734
+ <p>Take photos with camera or upload videos to get comprehensive sentiment analysis from multiple modalities:</p>
735
+ <ul>
736
+ <li>📸 <strong>Vision Analysis:</strong> Camera photos or video frames for facial expression analysis</li>
737
+ <li>🎵 <strong>Audio Analysis:</strong> Audio files or extracted audio from videos for vocal sentiment</li>
738
+ <li>📝 <strong>Text Analysis:</strong> Transcribed audio for text sentiment analysis</li>
739
+ </ul>
740
+ </div>
741
+ """,
742
  unsafe_allow_html=True,
743
  )
744
 
 
762
  with col2:
763
  st.markdown(
764
  """
765
+ <div style="text-align: center; padding: 20px; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); border-radius: 10px; color: white;">
766
+ <h3>🚧 Coming Soon 🚧</h3>
767
+ <p>Video recording feature is under development</p>
768
+ <p>Use Upload Video File for now!</p>
769
+ </div>
770
+ """,
771
  unsafe_allow_html=True,
772
  )
773
 
774
  # Placeholder for future recording functionality
775
  st.markdown(
776
  """
777
+ **Future Features:**
778
+ - Real-time video recording with camera
779
+ - Audio capture during recording
780
+ - Automatic frame extraction
781
+ - Live transcription
782
+ - WebRTC integration for low-latency streaming
783
+ """
784
  )
785
 
786
  # Skip all the recording logic for now
 
793
  # File upload option
794
  st.markdown(
795
  """
796
+ <div class="upload-section">
797
+ <h4>📁 Upload Video File</h4>
798
+ <p>Upload a video file for comprehensive multimodal analysis.</p>
799
+ <p><strong>Supported Formats:</strong> MP4, AVI, MOV, MKV, WMV, FLV</p>
800
+ <p><strong>Recommended:</strong> Videos with clear audio and visual content</p>
801
+ </div>
802
+ """,
803
  unsafe_allow_html=True,
804
  )
805
 
806
  uploaded_video = st.file_uploader(
807
  "Choose a video file",
808
+ type=SUPPORTED_VIDEO_FORMATS,
809
  help="Supported formats: MP4, AVI, MOV, MKV, WMV, FLV",
810
  )
811
 
 
813
  video_name = uploaded_video.name if uploaded_video else None
814
  video_file = uploaded_video
815
 
 
 
816
  if video_file is not None:
817
  # Display video or photo
818
  if video_source == "camera_photo":
 
827
 
828
  uploaded_audio = st.file_uploader(
829
  "Upload audio file for audio analysis:",
830
+ type=SUPPORTED_AUDIO_FORMATS,
831
  key="camera_audio",
832
  help="Upload an audio file to complement the photo analysis",
833
  )
 
845
  else:
846
  # For uploaded videos
847
  st.video(video_file)
848
+ file_info = get_file_info(video_file)
849
+ st.info(
850
+ f"File: {file_info['name']} | Size: {format_file_size(file_info['size_bytes'])}"
851
+ )
 
852
  audio_bytes = None # Will be extracted from video
853
 
854
  # Video Processing Pipeline
 
992
  st.metric("📊 Overall Confidence", f"{confidence:.2f}")
993
 
994
  # Color-coded sentiment display
995
+ sentiment_colors = get_sentiment_colors()
996
+ emoji = sentiment_colors.get(sentiment, "")
 
 
 
997
 
998
  st.markdown(
999
  f"""
1000
+ <div class="result-box">
1001
+ <h4>{emoji} Max Fusion Sentiment: {sentiment}</h4>
1002
+ <p><strong>Overall Confidence:</strong> {confidence:.2f}</p>
1003
+ <p><strong>Modalities Analyzed:</strong> {len(results_data)}</p>
1004
+ <p><strong>Video Source:</strong> {video_name}</p>
1005
+ <p><strong>Analysis Type:</strong> Comprehensive Multi-Modal Sentiment Analysis</p>
1006
  </div>
1007
  """,
1008
  unsafe_allow_html=True,
 
1020
  else:
1021
  st.info("📁 Please upload a video file to begin Max Fusion analysis.")
1022
 
1023
+
1024
+ def main():
1025
+ """Main application function."""
1026
+ # Sidebar navigation
1027
+ st.sidebar.title("Sentiment Analysis")
1028
+ st.sidebar.markdown("---")
1029
+
1030
+ # Navigation
1031
+ page = st.sidebar.selectbox(
1032
+ "Choose a page:",
1033
+ [
1034
+ "Home",
1035
+ "Text Sentiment",
1036
+ "Audio Sentiment",
1037
+ "Vision Sentiment",
1038
+ "Fused Model",
1039
+ "Max Fusion",
1040
+ ],
1041
+ )
1042
+
1043
+ # Page routing
1044
+ if page == "Home":
1045
+ render_home_page()
1046
+ elif page == "Text Sentiment":
1047
+ render_text_sentiment_page()
1048
+ elif page == "Audio Sentiment":
1049
+ render_audio_sentiment_page()
1050
+ elif page == "Vision Sentiment":
1051
+ render_vision_sentiment_page()
1052
+ elif page == "Fused Model":
1053
+ render_fused_model_page()
1054
+ elif page == "Max Fusion":
1055
+ render_max_fusion_page()
1056
+
1057
+ # Footer
1058
+ st.markdown("---")
1059
+ st.markdown(
1060
+ """
1061
+ <div style="text-align: center; color: #666; padding: 1rem;">
1062
+ <p>Built with ❤️ | by <a href="https://github.com/iamfaham">iamfaham</a></p>
1063
+ <p>Version: {version}</p>
1064
+ </div>
1065
+ """.format(
1066
+ version=APP_VERSION
1067
+ ),
1068
+ unsafe_allow_html=True,
1069
+ )
1070
+
1071
+
1072
+ if __name__ == "__main__":
1073
+ main()
pyproject.toml CHANGED
@@ -11,4 +11,6 @@ dependencies = [
11
  "speechrecognition>=3.10.0",
12
  "streamlit-webrtc>=0.47.0",
13
  "opencv-python-headless>=4.8.0",
 
 
14
  ]
 
11
  "speechrecognition>=3.10.0",
12
  "streamlit-webrtc>=0.47.0",
13
  "opencv-python-headless>=4.8.0",
14
+ "torch>=2.8.0",
15
+ "pillow>=11.3.0",
16
  ]
src/__init__.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Sentiment Fused - Multimodal Sentiment Analysis Package
3
+
4
+ A comprehensive package for analyzing sentiment from text, audio, and visual inputs
5
+ using state-of-the-art deep learning models.
6
+ """
7
+
8
+ __version__ = "0.1.0"
9
+ __author__ = "iamfaham"
10
+ __description__ = "Multimodal Sentiment Analysis with Text, Audio, and Vision Models"
src/config/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ """
2
+ Configuration package for Sentiment Fused application.
3
+ """
4
+
5
+ from .settings import *
src/config/settings.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Centralized configuration settings for the Sentiment Fused application.
3
+ """
4
+
5
+ import os
6
+ from pathlib import Path
7
+ from typing import Dict, Any
8
+
9
+ # Application Configuration
10
+ APP_NAME = "Multimodal Sentiment Analysis"
11
+ APP_VERSION = "0.1.0"
12
+ APP_ICON = "🧠"
13
+ APP_LAYOUT = "wide"
14
+
15
+ # Model Configuration
16
+ VISION_MODEL_CONFIG = {
17
+ "model_name": "resnet50",
18
+ "input_size": 224,
19
+ "num_classes": 7, # FER2013 default
20
+ "crop_tightness": 0.0, # No padding for tightest crop
21
+ }
22
+
23
+ AUDIO_MODEL_CONFIG = {
24
+ "model_name": "facebook/wav2vec2-base",
25
+ "target_sampling_rate": 16000,
26
+ "max_duration": 5.0,
27
+ "num_classes": 3, # Default sentiment classes
28
+ }
29
+
30
+ TEXT_MODEL_CONFIG = {
31
+ "model_name": "textblob",
32
+ "confidence_threshold": 0.1,
33
+ }
34
+
35
+ # File Processing Configuration
36
+ SUPPORTED_IMAGE_FORMATS = ["png", "jpg", "jpeg", "bmp", "tiff"]
37
+ SUPPORTED_AUDIO_FORMATS = ["wav", "mp3", "m4a", "flac"]
38
+ SUPPORTED_VIDEO_FORMATS = ["mp4", "avi", "mov", "mkv", "wmv", "flv"]
39
+
40
+ # Video Processing Configuration
41
+ MAX_VIDEO_FRAMES = 5
42
+ VIDEO_FRAME_INTERVALS = [0, 0.25, 0.5, 0.75, 1.0] # Frame extraction points
43
+
44
+ # Image Preprocessing Configuration
45
+ IMAGE_TRANSFORMS = {
46
+ "resize": 224,
47
+ "center_crop": 224,
48
+ "normalize_mean": [0.485, 0.456, 0.406],
49
+ "normalize_std": [0.229, 0.224, 0.225],
50
+ }
51
+
52
+ # Sentiment Mapping Configuration
53
+ SENTIMENT_MAPPINGS = {
54
+ 3: {0: "Negative", 1: "Neutral", 2: "Positive"},
55
+ 4: {0: "Angry", 1: "Sad", 2: "Happy", 3: "Neutral"},
56
+ 7: {
57
+ 0: "Angry",
58
+ 1: "Disgust",
59
+ 2: "Fear",
60
+ 3: "Happy",
61
+ 4: "Sad",
62
+ 5: "Surprise",
63
+ 6: "Neutral",
64
+ },
65
+ }
66
+
67
+ # UI Configuration
68
+ UI_COLORS = {
69
+ "primary": "#1f77b4",
70
+ "success": "#28a745",
71
+ "warning": "#ffc107",
72
+ "danger": "#dc3545",
73
+ "info": "#17a2b8",
74
+ "light": "#f8f9fa",
75
+ "dark": "#343a40",
76
+ }
77
+
78
+ # CSS Styles
79
+ CUSTOM_CSS = """
80
+ <style>
81
+ .main-header {
82
+ font-size: 2.5rem;
83
+ font-weight: bold;
84
+ color: #1f77b4;
85
+ text-align: center;
86
+ margin-bottom: 2rem;
87
+ }
88
+ .model-card {
89
+ background-color: #f0f2f6;
90
+ padding: 1.5rem;
91
+ border-radius: 10px;
92
+ margin: 1rem 0;
93
+ border-left: 4px solid #1f77b4;
94
+ }
95
+ .result-box {
96
+ background-color: #e8f4fd;
97
+ padding: 1rem;
98
+ border-radius: 8px;
99
+ border: 1px solid #1f77b4;
100
+ margin: 1rem 0;
101
+ }
102
+ .upload-section {
103
+ background-color: #f8f9fa;
104
+ padding: 1.5rem;
105
+ border-radius: 10px;
106
+ border: 2px dashed #dee2e6;
107
+ text-align: center;
108
+ margin: 1rem 0;
109
+ }
110
+ </style>
111
+ """
112
+
113
+ # Paths
114
+ BASE_DIR = Path(__file__).parent.parent.parent
115
+ MODELS_DIR = BASE_DIR / "models"
116
+ SRC_DIR = BASE_DIR / "src"
117
+ UI_DIR = SRC_DIR / "ui"
118
+
119
+ # Environment Variables
120
+ ENV_VARS = {
121
+ "VISION_MODEL_DRIVE_ID": os.getenv("VISION_MODEL_DRIVE_ID", ""),
122
+ "AUDIO_MODEL_DRIVE_ID": os.getenv("AUDIO_MODEL_DRIVE_ID", ""),
123
+ "VISION_MODEL_FILENAME": os.getenv("VISION_MODEL_FILENAME", "resnet50_model.pth"),
124
+ "AUDIO_MODEL_FILENAME": os.getenv("AUDIO_MODEL_FILENAME", "wav2vec2_model.pth"),
125
+ }
126
+
127
+ # Logging Configuration
128
+ LOGGING_CONFIG = {
129
+ "level": "INFO",
130
+ "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
131
+ "handlers": ["console", "file"],
132
+ }
133
+
134
+ # Cache Configuration
135
+ CACHE_CONFIG = {
136
+ "ttl": 3600, # 1 hour
137
+ "max_entries": 100,
138
+ }
139
+
140
+
141
+ def get_sentiment_mapping(num_classes: int) -> Dict[int, str]:
142
+ """Get sentiment mapping based on number of classes."""
143
+ return SENTIMENT_MAPPINGS.get(
144
+ num_classes, {i: f"Class_{i}" for i in range(num_classes)}
145
+ )
146
+
147
+
148
+ def validate_environment() -> Dict[str, bool]:
149
+ """Validate that required environment variables are set."""
150
+ validation = {}
151
+ for var_name, var_value in ENV_VARS.items():
152
+ validation[var_name] = bool(var_value)
153
+ return validation
src/models/__init__.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Model package for Sentiment Fused application.
3
+ """
4
+
5
+ from .vision_model import *
6
+ from .audio_model import *
7
+ from .text_model import *
8
+ from .fused_model import *
src/models/audio_model.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Audio sentiment analysis model using fine-tuned Wav2Vec2.
3
+ """
4
+
5
+ import logging
6
+ import streamlit as st
7
+ from typing import Tuple
8
+ import torch
9
+ from PIL import Image
10
+ import os
11
+ from ..config.settings import AUDIO_MODEL_CONFIG
12
+ from ..utils.preprocessing import preprocess_audio_for_model
13
+ from ..utils.sentiment_mapping import get_sentiment_mapping
14
+ from src.utils.simple_model_manager import SimpleModelManager
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ @st.cache_resource
20
+ def load_audio_model():
21
+ """Load the pre-trained Wav2Vec2 audio sentiment model from Google Drive."""
22
+ try:
23
+ manager = SimpleModelManager()
24
+ if manager is None:
25
+ logger.error("Model manager not available")
26
+ st.error("Model manager not available")
27
+ return None, None, None, None
28
+
29
+ # Load the model using the Google Drive manager
30
+ model, device = manager.load_audio_model()
31
+
32
+ if model is None:
33
+ logger.error("Failed to load audio model from Google Drive")
34
+ st.error("Failed to load audio model from Google Drive")
35
+ return None, None, None, None
36
+
37
+ # For Wav2Vec2 models, we need to determine the number of classes
38
+ # This is typically available in the model configuration
39
+ try:
40
+ num_classes = model.config.num_labels
41
+ except:
42
+ # Fallback: try to infer from the model
43
+ try:
44
+ num_classes = model.classifier.out_features
45
+ except:
46
+ num_classes = AUDIO_MODEL_CONFIG["num_classes"] # Default assumption
47
+
48
+ # Load feature extractor
49
+ from transformers import AutoFeatureExtractor
50
+
51
+ feature_extractor = AutoFeatureExtractor.from_pretrained(
52
+ AUDIO_MODEL_CONFIG["model_name"]
53
+ )
54
+
55
+ logger.info(f"Audio model loaded successfully with {num_classes} classes!")
56
+ st.success(f"Audio model loaded successfully with {num_classes} classes!")
57
+ return model, device, num_classes, feature_extractor
58
+ except Exception as e:
59
+ logger.error(f"Error loading audio model: {str(e)}")
60
+ st.error(f"Error loading audio model: {str(e)}")
61
+ return None, None, None, None
62
+
63
+
64
+ def predict_audio_sentiment(audio_bytes: bytes) -> Tuple[str, float]:
65
+ """
66
+ Analyze audio sentiment using fine-tuned Wav2Vec2 model.
67
+
68
+ Preprocessing matches CREMA-D + RAVDESS training specifications:
69
+ - Target sampling rate: 16kHz
70
+ - Max duration: 5.0 seconds
71
+ - Feature extraction: AutoFeatureExtractor with max_length, truncation, padding
72
+
73
+ Args:
74
+ audio_bytes: Raw audio bytes
75
+
76
+ Returns:
77
+ Tuple of (sentiment, confidence)
78
+ """
79
+ if audio_bytes is None:
80
+ return "No audio provided", 0.0
81
+
82
+ try:
83
+ # Load model if not already loaded
84
+ model, device, num_classes, feature_extractor = load_audio_model()
85
+ if model is None:
86
+ return "Model not loaded", 0.0
87
+
88
+ # Use our centralized preprocessing function
89
+ input_values = preprocess_audio_for_model(audio_bytes)
90
+ if input_values is None:
91
+ return "Preprocessing failed", 0.0
92
+
93
+ # Debug: Log the tensor shape
94
+ logger.info(f"Preprocessed audio tensor shape: {input_values.shape}")
95
+
96
+ # Ensure correct tensor shape: [batch_size, sequence_length]
97
+ if input_values.dim() == 1:
98
+ input_values = input_values.unsqueeze(0) # Add batch dimension if missing
99
+ elif input_values.dim() == 3:
100
+ # If we get [batch, sequence, channels], squeeze the channels
101
+ input_values = input_values.squeeze(-1)
102
+
103
+ logger.info(f"Final audio tensor shape: {input_values.shape}")
104
+
105
+ # Move to device
106
+ input_values = input_values.to(device)
107
+
108
+ # Run inference
109
+ with torch.no_grad():
110
+ outputs = model(input_values)
111
+ probabilities = torch.softmax(outputs.logits, dim=1)
112
+ confidence, predicted = torch.max(probabilities, 1)
113
+
114
+ # Get sentiment mapping based on number of classes
115
+ sentiment_map = get_sentiment_mapping(num_classes)
116
+ sentiment = sentiment_map[predicted.item()]
117
+ confidence_score = confidence.item()
118
+
119
+ logger.info(
120
+ f"Audio sentiment analysis completed: {sentiment} (confidence: {confidence_score:.2f})"
121
+ )
122
+ return sentiment, confidence_score
123
+
124
+ except ImportError as e:
125
+ logger.error(f"Required library not installed: {str(e)}")
126
+ st.error(f"Required library not installed: {str(e)}")
127
+ st.info("Please install: pip install librosa transformers")
128
+ return "Library not available", 0.0
129
+ except Exception as e:
130
+ logger.error(f"Error in audio sentiment prediction: {str(e)}")
131
+ st.error(f"Error in audio sentiment prediction: {str(e)}")
132
+ return "Error occurred", 0.0
133
+
134
+
135
+ def get_audio_model_info() -> dict:
136
+ """Get information about the audio sentiment model."""
137
+ return {
138
+ "model_name": AUDIO_MODEL_CONFIG["model_name"],
139
+ "description": "Fine-tuned Wav2Vec2 for audio sentiment analysis",
140
+ "capabilities": [
141
+ "Audio sentiment classification",
142
+ "Automatic audio preprocessing",
143
+ "CREMA-D + RAVDESS dataset compatibility",
144
+ "Real-time audio analysis",
145
+ ],
146
+ "input_format": "Audio files (WAV, MP3, M4A, FLAC)",
147
+ "output_format": "Sentiment label + confidence score",
148
+ "preprocessing": {
149
+ "sampling_rate": f"{AUDIO_MODEL_CONFIG['target_sampling_rate']} Hz",
150
+ "max_duration": f"{AUDIO_MODEL_CONFIG['max_duration']} seconds",
151
+ "feature_extraction": "AutoFeatureExtractor",
152
+ "normalization": "Model-specific",
153
+ },
154
+ }
src/models/fused_model.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Fused sentiment analysis model combining text, audio, and vision models.
3
+ """
4
+
5
+ import logging
6
+ from typing import Tuple, Optional, List
7
+ from PIL import Image
8
+
9
+ from .text_model import predict_text_sentiment
10
+ from .audio_model import predict_audio_sentiment
11
+ from .vision_model import predict_vision_sentiment
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ def predict_fused_sentiment(
17
+ text: Optional[str] = None,
18
+ audio_bytes: Optional[bytes] = None,
19
+ image: Optional[Image.Image] = None,
20
+ ) -> Tuple[str, float]:
21
+ """
22
+ Implement ensemble/fusion logic combining all three models.
23
+
24
+ Args:
25
+ text: Input text for text sentiment analysis
26
+ audio_bytes: Audio bytes for audio sentiment analysis
27
+ image: Input image for vision sentiment analysis
28
+
29
+ Returns:
30
+ Tuple of (fused_sentiment, overall_confidence)
31
+ """
32
+ results = []
33
+
34
+ if text:
35
+ text_sentiment, text_conf = predict_text_sentiment(text)
36
+ results.append(("Text", text_sentiment, text_conf))
37
+
38
+ if audio_bytes:
39
+ audio_sentiment, audio_conf = predict_audio_sentiment(audio_bytes)
40
+ results.append(("Audio", audio_sentiment, audio_conf))
41
+
42
+ if image:
43
+ vision_sentiment, vision_conf = predict_vision_sentiment(image)
44
+ results.append(("Vision", vision_sentiment, vision_conf))
45
+
46
+ if not results:
47
+ return "No inputs provided", 0.0
48
+
49
+ # Simple ensemble logic (can be enhanced with more sophisticated fusion strategies)
50
+ sentiment_counts = {}
51
+ total_confidence = 0
52
+ modality_weights = {"Text": 0.3, "Audio": 0.35, "Vision": 0.35} # Weighted voting
53
+
54
+ for modality, sentiment, confidence in results:
55
+ if sentiment not in sentiment_counts:
56
+ sentiment_counts[sentiment] = {"count": 0, "weighted_conf": 0}
57
+
58
+ sentiment_counts[sentiment]["count"] += 1
59
+ weight = modality_weights.get(modality, 0.33)
60
+ sentiment_counts[sentiment]["weighted_conf"] += confidence * weight
61
+ total_confidence += confidence
62
+
63
+ # Weighted majority voting with confidence averaging
64
+ if sentiment_counts:
65
+ # Find sentiment with highest weighted confidence
66
+ final_sentiment = max(
67
+ sentiment_counts.keys(), key=lambda s: sentiment_counts[s]["weighted_conf"]
68
+ )
69
+
70
+ # Calculate overall confidence as weighted average
71
+ avg_confidence = total_confidence / len(results)
72
+
73
+ logger.info(
74
+ f"Fused sentiment analysis completed: {final_sentiment} (confidence: {avg_confidence:.2f})"
75
+ )
76
+ logger.info(f"Individual results: {results}")
77
+
78
+ return final_sentiment, avg_confidence
79
+ else:
80
+ return "No valid predictions", 0.0
81
+
82
+
83
+ def get_fusion_strategy_info() -> dict:
84
+ """Get information about the fusion strategy."""
85
+ return {
86
+ "strategy_name": "Weighted Ensemble Fusion",
87
+ "description": "Combines predictions from text, audio, and vision models using weighted voting",
88
+ "modality_weights": {"Text": 0.3, "Audio": 0.35, "Vision": 0.35},
89
+ "fusion_method": "Weighted majority voting with confidence averaging",
90
+ "advantages": [
91
+ "Robust to individual model failures",
92
+ "Leverages complementary information from different modalities",
93
+ "Configurable modality weights",
94
+ "Real-time ensemble prediction",
95
+ ],
96
+ "use_cases": [
97
+ "Multi-modal content analysis",
98
+ "Enhanced sentiment accuracy",
99
+ "Cross-validation of predictions",
100
+ "Comprehensive emotional understanding",
101
+ ],
102
+ }
103
+
104
+
105
+ def analyze_modality_agreement(
106
+ text: Optional[str] = None,
107
+ audio_bytes: Optional[bytes] = None,
108
+ image: Optional[Image.Image] = None,
109
+ ) -> dict:
110
+ """
111
+ Analyze agreement between different modalities.
112
+
113
+ Args:
114
+ text: Input text
115
+ audio_bytes: Audio bytes
116
+ image: Input image
117
+
118
+ Returns:
119
+ Dictionary containing agreement analysis
120
+ """
121
+ results = {}
122
+
123
+ if text:
124
+ text_sentiment, text_conf = predict_text_sentiment(text)
125
+ results["text"] = {"sentiment": text_sentiment, "confidence": text_conf}
126
+
127
+ if audio_bytes:
128
+ audio_sentiment, audio_conf = predict_audio_sentiment(audio_bytes)
129
+ results["audio"] = {"sentiment": audio_sentiment, "confidence": audio_conf}
130
+
131
+ if image:
132
+ vision_sentiment, vision_conf = predict_vision_sentiment(image)
133
+ results["vision"] = {"sentiment": vision_sentiment, "confidence": vision_conf}
134
+
135
+ if len(results) < 2:
136
+ return {"agreement_level": "insufficient_modalities", "details": results}
137
+
138
+ # Analyze agreement
139
+ sentiments = [result["sentiment"] for result in results.values()]
140
+ unique_sentiments = set(sentiments)
141
+
142
+ if len(unique_sentiments) == 1:
143
+ agreement_level = "perfect"
144
+ agreement_score = 1.0
145
+ elif len(unique_sentiments) == 2:
146
+ agreement_level = "partial"
147
+ agreement_score = 0.5
148
+ else:
149
+ agreement_level = "low"
150
+ agreement_score = 0.0
151
+
152
+ # Calculate confidence consistency
153
+ confidences = [result["confidence"] for result in results.values()]
154
+ confidence_std = sum(confidences) / len(confidences) if confidences else 0
155
+
156
+ return {
157
+ "agreement_level": agreement_level,
158
+ "agreement_score": agreement_score,
159
+ "modalities_analyzed": len(results),
160
+ "sentiment_distribution": {s: sentiments.count(s) for s in unique_sentiments},
161
+ "confidence_consistency": confidence_std,
162
+ "individual_results": results,
163
+ "recommendation": _get_agreement_recommendation(agreement_level, len(results)),
164
+ }
165
+
166
+
167
+ def _get_agreement_recommendation(agreement_level: str, num_modalities: int) -> str:
168
+ """Get recommendation based on agreement level."""
169
+ if agreement_level == "perfect":
170
+ return "High confidence in prediction - all modalities agree"
171
+ elif agreement_level == "partial":
172
+ return "Moderate confidence - consider modality-specific factors"
173
+ elif agreement_level == "low":
174
+ return "Low confidence - modalities disagree, consider context"
175
+ else:
176
+ return "Insufficient data for reliable fusion"
src/models/text_model.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Text sentiment analysis model using TextBlob.
3
+ """
4
+
5
+ import logging
6
+ from typing import Tuple, Optional
7
+ from ..config.settings import TEXT_MODEL_CONFIG
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ def predict_text_sentiment(text: str) -> Tuple[str, float]:
13
+ """
14
+ Analyze text sentiment using TextBlob.
15
+
16
+ Args:
17
+ text: Input text to analyze
18
+
19
+ Returns:
20
+ Tuple of (sentiment, confidence)
21
+ """
22
+ if not text or text.strip() == "":
23
+ return "No text provided", 0.0
24
+
25
+ try:
26
+ from textblob import TextBlob
27
+
28
+ # Create TextBlob object
29
+ blob = TextBlob(text)
30
+
31
+ # Get polarity (-1 to 1, where -1 is very negative, 1 is very positive)
32
+ polarity = blob.sentiment.polarity
33
+
34
+ # Get subjectivity (0 to 1, where 0 is very objective, 1 is very subjective)
35
+ subjectivity = blob.sentiment.subjectivity
36
+
37
+ # Convert polarity to sentiment categories
38
+ confidence_threshold = TEXT_MODEL_CONFIG["confidence_threshold"]
39
+
40
+ if polarity > confidence_threshold:
41
+ sentiment = "Positive"
42
+ confidence = min(0.95, 0.6 + abs(polarity) * 0.3)
43
+ elif polarity < -confidence_threshold:
44
+ sentiment = "Negative"
45
+ confidence = min(0.95, 0.6 + abs(polarity) * 0.3)
46
+ else:
47
+ sentiment = "Neutral"
48
+ confidence = 0.7 - abs(polarity) * 0.2
49
+
50
+ # Round confidence to 2 decimal places
51
+ confidence = round(confidence, 2)
52
+
53
+ logger.info(
54
+ f"Text sentiment analysis completed: {sentiment} (confidence: {confidence})"
55
+ )
56
+ return sentiment, confidence
57
+
58
+ except ImportError:
59
+ logger.error(
60
+ "TextBlob not installed. Please install it with: pip install textblob"
61
+ )
62
+ return "TextBlob not available", 0.0
63
+ except Exception as e:
64
+ logger.error(f"Error in text sentiment analysis: {str(e)}")
65
+ return "Error occurred", 0.0
66
+
67
+
68
+ def get_text_model_info() -> dict:
69
+ """Get information about the text sentiment model."""
70
+ return {
71
+ "model_name": TEXT_MODEL_CONFIG["model_name"],
72
+ "description": "Natural Language Processing based sentiment analysis using TextBlob",
73
+ "capabilities": [
74
+ "Text sentiment classification (Positive/Negative/Neutral)",
75
+ "Confidence scoring",
76
+ "Real-time analysis",
77
+ "No external API required",
78
+ ],
79
+ "input_format": "Plain text",
80
+ "output_format": "Sentiment label + confidence score",
81
+ }
src/models/vision_model.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Vision sentiment analysis model using fine-tuned ResNet-50.
3
+ """
4
+
5
+ import logging
6
+ import streamlit as st
7
+ from typing import Tuple
8
+ import torch
9
+ import torch.nn.functional as F
10
+ from PIL import Image
11
+
12
+ from ..config.settings import VISION_MODEL_CONFIG
13
+ from ..utils.preprocessing import detect_and_preprocess_face, get_vision_transforms
14
+ from ..utils.sentiment_mapping import get_sentiment_mapping
15
+ from src.utils.simple_model_manager import SimpleModelManager
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ @st.cache_resource
21
+ def get_model_manager():
22
+ """Get the Google Drive model manager instance."""
23
+ try:
24
+ manager = SimpleModelManager()
25
+ return manager
26
+ except Exception as e:
27
+ logger.error(f"Failed to initialize model manager: {e}")
28
+ st.error(f"Failed to initialize model manager: {e}")
29
+ return None
30
+
31
+
32
+ @st.cache_resource
33
+ def load_vision_model():
34
+ """Load the pre-trained ResNet-50 vision sentiment model from Google Drive."""
35
+ try:
36
+ manager = get_model_manager()
37
+ if manager is None:
38
+ logger.error("Model manager not available")
39
+ st.error("Model manager not available")
40
+ return None, None, None
41
+
42
+ # Load the model using the Google Drive manager
43
+ model, device, num_classes = manager.load_vision_model()
44
+
45
+ if model is None:
46
+ logger.error("Failed to load vision model from Google Drive")
47
+ st.error("Failed to load vision model from Google Drive")
48
+ return None, None, None
49
+
50
+ logger.info(f"Vision model loaded successfully with {num_classes} classes!")
51
+ st.success(f"Vision model loaded successfully with {num_classes} classes!")
52
+ return model, device, num_classes
53
+ except Exception as e:
54
+ logger.error(f"Error loading vision model: {str(e)}")
55
+ st.error(f"Error loading vision model: {str(e)}")
56
+ return None, None, None
57
+
58
+
59
+ def predict_vision_sentiment(
60
+ image: Image.Image, crop_tightness: float = None
61
+ ) -> Tuple[str, float]:
62
+ """
63
+ Load ResNet-50 and run inference for vision sentiment analysis.
64
+
65
+ Args:
66
+ image: Input image (PIL Image or numpy array)
67
+ crop_tightness: Padding around face (0.0 = no padding, 0.3 = 30% padding)
68
+
69
+ Returns:
70
+ Tuple of (sentiment, confidence)
71
+ """
72
+ if image is None:
73
+ return "No image provided", 0.0
74
+
75
+ try:
76
+ # Use default crop tightness if not specified
77
+ if crop_tightness is None:
78
+ crop_tightness = VISION_MODEL_CONFIG["crop_tightness"]
79
+
80
+ # Load model if not already loaded
81
+ model, device, num_classes = load_vision_model()
82
+ if model is None:
83
+ return "Model not loaded", 0.0
84
+
85
+ # Preprocess image to match FER2013 format
86
+ st.info(
87
+ "Detecting face and preprocessing image to match training data format..."
88
+ )
89
+ preprocessed_image = detect_and_preprocess_face(
90
+ image, crop_tightness=crop_tightness
91
+ )
92
+
93
+ if preprocessed_image is None:
94
+ return "Image preprocessing failed", 0.0
95
+
96
+ # Show preprocessed image
97
+ st.image(
98
+ preprocessed_image,
99
+ caption="Preprocessed Image (224x224 Grayscale → 3-channel RGB)",
100
+ width=200,
101
+ )
102
+
103
+ # Get transforms
104
+ transform = get_vision_transforms()
105
+
106
+ # Convert preprocessed image to tensor
107
+ image_tensor = transform(preprocessed_image).unsqueeze(0).to(device)
108
+
109
+ # Run inference
110
+ with torch.no_grad():
111
+ outputs = model(image_tensor)
112
+
113
+ # Debug: print output shape
114
+ st.info(f"Model output shape: {outputs.shape}")
115
+
116
+ probabilities = F.softmax(outputs, dim=1)
117
+ confidence, predicted = torch.max(probabilities, 1)
118
+
119
+ # Get sentiment mapping based on number of classes
120
+ sentiment_map = get_sentiment_mapping(num_classes)
121
+ sentiment = sentiment_map[predicted.item()]
122
+ confidence_score = confidence.item()
123
+
124
+ logger.info(
125
+ f"Vision sentiment analysis completed: {sentiment} (confidence: {confidence_score:.2f})"
126
+ )
127
+ return sentiment, confidence_score
128
+
129
+ except Exception as e:
130
+ logger.error(f"Error in vision sentiment prediction: {str(e)}")
131
+ st.error(f"Error in vision sentiment prediction: {str(e)}")
132
+ st.error(
133
+ f"Model output shape mismatch. Expected {num_classes} classes but got different."
134
+ )
135
+ return "Error occurred", 0.0
136
+
137
+
138
+ def get_vision_model_info() -> dict:
139
+ """Get information about the vision sentiment model."""
140
+ return {
141
+ "model_name": VISION_MODEL_CONFIG["model_name"],
142
+ "description": "Fine-tuned ResNet-50 for facial expression sentiment analysis",
143
+ "capabilities": [
144
+ "Facial expression recognition",
145
+ "Automatic face detection and cropping",
146
+ "FER2013 dataset format compatibility",
147
+ "Real-time image analysis",
148
+ ],
149
+ "input_format": "Images (PNG, JPG, JPEG, BMP, TIFF)",
150
+ "output_format": "Sentiment label + confidence score",
151
+ "preprocessing": {
152
+ "face_detection": "OpenCV Haar Cascade",
153
+ "image_size": f"{VISION_MODEL_CONFIG['input_size']}x{VISION_MODEL_CONFIG['input_size']}",
154
+ "color_format": "Grayscale → 3-channel RGB",
155
+ "normalization": "ImageNet standard",
156
+ },
157
+ }
src/ui/__init__.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ """
2
+ UI package for Sentiment Fused application.
3
+ """
4
+
5
+ from .components import *
6
+ from .pages import *
7
+ from .styles import *
src/ui/styles.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ UI styles and CSS for the Sentiment Fused application.
3
+ """
4
+
5
+ from ..config.settings import CUSTOM_CSS, UI_COLORS
6
+
7
+
8
+ def get_custom_css() -> str:
9
+ """Get the custom CSS styles for the application."""
10
+ return CUSTOM_CSS
11
+
12
+
13
+ def get_ui_colors() -> dict:
14
+ """Get the UI color scheme."""
15
+ return UI_COLORS
16
+
17
+
18
+ def get_sentiment_color_style(sentiment: str) -> str:
19
+ """Get color style for different sentiment types."""
20
+ colors = {
21
+ "Positive": "color: #28a745;",
22
+ "Negative": "color: #dc3545;",
23
+ "Neutral": "color: #ffc107;",
24
+ "Angry": "color: #dc3545;",
25
+ "Sad": "color: #17a2b8;",
26
+ "Happy": "color: #28a745;",
27
+ "Fear": "color: #6f42c1;",
28
+ "Disgust": "color: #fd7e14;",
29
+ "Surprise": "color: #ffc107;",
30
+ }
31
+ return colors.get(sentiment, "color: #6c757d;")
32
+
33
+
34
+ def get_metric_style(metric_type: str = "default") -> str:
35
+ """Get styling for different metric types."""
36
+ styles = {
37
+ "default": "background-color: #f8f9fa; padding: 1rem; border-radius: 8px;",
38
+ "success": "background-color: #d4edda; padding: 1rem; border-radius: 8px; border: 1px solid #c3e6cb;",
39
+ "warning": "background-color: #fff3cd; padding: 1rem; border-radius: 8px; border: 1px solid #ffeaa7;",
40
+ "error": "background-color: #f8d7da; padding: 1rem; border-radius: 8px; border: 1px solid #f5c6cb;",
41
+ "info": "background-color: #d1ecf1; padding: 1rem; border-radius: 8px; border: 1px solid #bee5eb;",
42
+ }
43
+ return styles.get(metric_type, styles["default"])
44
+
45
+
46
+ def get_card_style(card_type: str = "default") -> str:
47
+ """Get styling for different card types."""
48
+ styles = {
49
+ "default": "background-color: #f0f2f6; padding: 1.5rem; border-radius: 10px; margin: 1rem 0; border-left: 4px solid #1f77b4;",
50
+ "model": "background-color: #f0f2f6; padding: 1.5rem; border-radius: 10px; margin: 1rem 0; border-left: 4px solid #1f77b4;",
51
+ "result": "background-color: #e8f4fd; padding: 1rem; border-radius: 8px; border: 1px solid #1f77b4; margin: 1rem 0;",
52
+ "upload": "background-color: #f8f9fa; padding: 1.5rem; border-radius: 10px; border: 2px dashed #dee2e6; text-align: center; margin: 1rem 0;",
53
+ "info": "background-color: #d1ecf1; padding: 1rem; border-radius: 8px; border: 1px solid #bee5eb; margin: 1rem 0;",
54
+ }
55
+ return styles.get(card_type, styles["default"])
56
+
57
+
58
+ def get_button_style(button_type: str = "primary") -> str:
59
+ """Get styling for different button types."""
60
+ styles = {
61
+ "primary": "background-color: #1f77b4; color: white; border: none; padding: 0.5rem 1rem; border-radius: 5px;",
62
+ "secondary": "background-color: #6c757d; color: white; border: none; padding: 0.5rem 1rem; border-radius: 5px;",
63
+ "success": "background-color: #28a745; color: white; border: none; padding: 0.5rem 1rem; border-radius: 5px;",
64
+ "warning": "background-color: #ffc107; color: black; border: none; padding: 0.5rem 1rem; border-radius: 5px;",
65
+ "danger": "background-color: #dc3545; color: white; border: none; padding: 0.5rem 1rem; border-radius: 5px;",
66
+ }
67
+ return styles.get(button_type, styles["primary"])
68
+
69
+
70
+ def get_sidebar_style() -> str:
71
+ """Get styling for the sidebar."""
72
+ return """
73
+ <style>
74
+ .css-1d391kg {
75
+ background-color: #f8f9fa;
76
+ }
77
+ .css-1d391kg .sidebar-content {
78
+ padding: 1rem;
79
+ }
80
+ </style>
81
+ """
82
+
83
+
84
+ def get_header_style() -> str:
85
+ """Get styling for the main header."""
86
+ return """
87
+ <style>
88
+ .main-header {
89
+ font-size: 2.5rem;
90
+ font-weight: bold;
91
+ color: #1f77b4;
92
+ text-align: center;
93
+ margin-bottom: 2rem;
94
+ text-shadow: 2px 2px 4px rgba(0,0,0,0.1);
95
+ }
96
+ </style>
97
+ """
src/utils/__init__.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ """
2
+ Utility functions package for Sentiment Fused application.
3
+ """
4
+
5
+ from .preprocessing import *
6
+ from .file_handling import *
7
+ from .sentiment_mapping import *
src/utils/file_handling.py ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ File handling utilities for different input types.
3
+ """
4
+
5
+ import os
6
+ import tempfile
7
+ import logging
8
+ from typing import Optional, Union, Tuple
9
+ from pathlib import Path
10
+
11
+ from ..config.settings import (
12
+ SUPPORTED_IMAGE_FORMATS,
13
+ SUPPORTED_AUDIO_FORMATS,
14
+ SUPPORTED_VIDEO_FORMATS,
15
+ )
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ def validate_file_format(filename: str, supported_formats: list) -> bool:
21
+ """
22
+ Validate if a file has a supported format.
23
+
24
+ Args:
25
+ filename: Name of the file to validate
26
+ supported_formats: List of supported file extensions
27
+
28
+ Returns:
29
+ True if file format is supported, False otherwise
30
+ """
31
+ if not filename:
32
+ return False
33
+
34
+ file_extension = Path(filename).suffix.lower().lstrip(".")
35
+ return file_extension in supported_formats
36
+
37
+
38
+ def validate_image_file(filename: str) -> bool:
39
+ """Validate if a file is a supported image format."""
40
+ return validate_file_format(filename, SUPPORTED_IMAGE_FORMATS)
41
+
42
+
43
+ def validate_audio_file(filename: str) -> bool:
44
+ """Validate if a file is a supported audio format."""
45
+ return validate_file_format(filename, SUPPORTED_AUDIO_FORMATS)
46
+
47
+
48
+ def validate_video_file(filename: str) -> bool:
49
+ """Validate if a file is a supported video format."""
50
+ return validate_file_format(filename, SUPPORTED_VIDEO_FORMATS)
51
+
52
+
53
+ def get_file_info(file_object) -> dict:
54
+ """
55
+ Extract file information from a file object.
56
+
57
+ Args:
58
+ file_object: File object (e.g., StreamlitUploadedFile)
59
+
60
+ Returns:
61
+ Dictionary containing file information
62
+ """
63
+ try:
64
+ if hasattr(file_object, "getvalue"):
65
+ file_size = len(file_object.getvalue())
66
+ file_name = getattr(file_object, "name", "Unknown")
67
+ else:
68
+ file_size = len(file_object)
69
+ file_name = "Unknown"
70
+
71
+ file_extension = (
72
+ Path(file_name).suffix.lower().lstrip(".")
73
+ if file_name != "Unknown"
74
+ else "Unknown"
75
+ )
76
+
77
+ return {
78
+ "name": file_name,
79
+ "size_bytes": file_size,
80
+ "size_kb": file_size / 1024,
81
+ "size_mb": file_size / (1024 * 1024),
82
+ "extension": file_extension,
83
+ "is_valid_image": (
84
+ validate_image_file(file_name) if file_extension != "Unknown" else False
85
+ ),
86
+ "is_valid_audio": (
87
+ validate_audio_file(file_name) if file_extension != "Unknown" else False
88
+ ),
89
+ "is_valid_video": (
90
+ validate_video_file(file_name) if file_extension != "Unknown" else False
91
+ ),
92
+ }
93
+ except Exception as e:
94
+ logger.error(f"Error getting file info: {str(e)}")
95
+ return {
96
+ "name": "Unknown",
97
+ "size_bytes": 0,
98
+ "size_kb": 0,
99
+ "size_mb": 0,
100
+ "extension": "Unknown",
101
+ "is_valid_image": False,
102
+ "is_valid_audio": False,
103
+ "is_valid_video": False,
104
+ }
105
+
106
+
107
+ def create_temp_file(
108
+ suffix: str = "", prefix: str = "temp_"
109
+ ) -> Tuple[str, tempfile.NamedTemporaryFile]:
110
+ """
111
+ Create a temporary file with proper cleanup handling.
112
+
113
+ Args:
114
+ suffix: File extension suffix
115
+ prefix: File name prefix
116
+
117
+ Returns:
118
+ Tuple of (file_path, temp_file_object)
119
+ """
120
+ temp_file = tempfile.NamedTemporaryFile(suffix=suffix, prefix=prefix, delete=False)
121
+ return temp_file.name, temp_file
122
+
123
+
124
+ def cleanup_temp_file(file_path: str) -> bool:
125
+ """
126
+ Safely cleanup a temporary file.
127
+
128
+ Args:
129
+ file_path: Path to the temporary file
130
+
131
+ Returns:
132
+ True if cleanup was successful, False otherwise
133
+ """
134
+ try:
135
+ if os.path.exists(file_path):
136
+ os.unlink(file_path)
137
+ return True
138
+ return True
139
+ except (OSError, PermissionError) as e:
140
+ logger.warning(f"Could not delete temporary file {file_path}: {e}")
141
+ return False
142
+
143
+
144
+ def format_file_size(size_bytes: int) -> str:
145
+ """
146
+ Format file size in human-readable format.
147
+
148
+ Args:
149
+ size_bytes: File size in bytes
150
+
151
+ Returns:
152
+ Formatted file size string
153
+ """
154
+ if size_bytes < 1024:
155
+ return f"{size_bytes} B"
156
+ elif size_bytes < 1024 * 1024:
157
+ return f"{size_bytes / 1024:.1f} KB"
158
+ elif size_bytes < 1024 * 1024 * 1024:
159
+ return f"{size_bytes / (1024 * 1024):.1f} MB"
160
+ else:
161
+ return f"{size_bytes / (1024 * 1024 * 1024):.1f} GB"
162
+
163
+
164
+ def safe_file_operation(operation_func, *args, **kwargs):
165
+ """
166
+ Safely execute a file operation with proper error handling.
167
+
168
+ Args:
169
+ operation_func: Function to execute
170
+ *args: Positional arguments for the function
171
+ **kwargs: Keyword arguments for the function
172
+
173
+ Returns:
174
+ Result of the operation or None if it fails
175
+ """
176
+ try:
177
+ return operation_func(*args, **kwargs)
178
+ except FileNotFoundError as e:
179
+ logger.error(f"File not found: {e}")
180
+ return None
181
+ except PermissionError as e:
182
+ logger.error(f"Permission denied: {e}")
183
+ return None
184
+ except OSError as e:
185
+ logger.error(f"OS error: {e}")
186
+ return None
187
+ except Exception as e:
188
+ logger.error(f"Unexpected error in file operation: {e}")
189
+ return None
src/utils/preprocessing.py ADDED
@@ -0,0 +1,467 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Preprocessing utilities for different input modalities.
3
+ """
4
+
5
+ import os
6
+ import tempfile
7
+ import logging
8
+ from typing import List, Optional, Tuple, Union
9
+
10
+ try:
11
+ from PIL import Image
12
+ import numpy as np
13
+
14
+ PIL_AVAILABLE = True
15
+ except ImportError:
16
+ PIL_AVAILABLE = False
17
+ Image = None
18
+ np = None
19
+
20
+ from ..config.settings import (
21
+ IMAGE_TRANSFORMS,
22
+ AUDIO_MODEL_CONFIG,
23
+ VISION_MODEL_CONFIG,
24
+ SUPPORTED_IMAGE_FORMATS,
25
+ SUPPORTED_AUDIO_FORMATS,
26
+ )
27
+
28
+ # Add Any to typing imports
29
+ from typing import List, Optional, Tuple, Union, Any
30
+
31
+ # Add torch import for audio preprocessing
32
+ try:
33
+ import torch
34
+
35
+ TORCH_AVAILABLE = True
36
+ except ImportError:
37
+ TORCH_AVAILABLE = False
38
+ torch = None
39
+
40
+ logger = logging.getLogger(__name__)
41
+
42
+
43
+ def detect_and_preprocess_face(
44
+ image: Union[Image.Image, np.ndarray, Any], crop_tightness: float = 0.05
45
+ ) -> Optional[Image.Image]:
46
+ """
47
+ Detect face in image, crop to face region, convert to grayscale, and resize.
48
+
49
+ Args:
50
+ image: Input image (PIL Image or numpy array)
51
+ crop_tightness: Padding around face (0.0 = no padding, 0.3 = 30% padding)
52
+
53
+ Returns:
54
+ Preprocessed PIL Image or None if preprocessing fails
55
+ """
56
+ if not PIL_AVAILABLE:
57
+ logger.error("PIL (Pillow) not available. Cannot process images.")
58
+ return None
59
+
60
+ try:
61
+ import cv2
62
+
63
+ # Convert PIL image to OpenCV format
64
+ if isinstance(image, Image.Image):
65
+ img_array = np.array(image)
66
+ # Convert RGB to BGR for OpenCV
67
+ if len(img_array.shape) == 3:
68
+ img_array = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
69
+ else:
70
+ img_array = image
71
+
72
+ # Load face detection cascade
73
+ face_cascade = cv2.CascadeClassifier(
74
+ cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
75
+ )
76
+
77
+ # Convert to grayscale for face detection
78
+ gray = cv2.cvtColor(img_array, cv2.COLOR_BGR2GRAY)
79
+
80
+ # Detect faces
81
+ faces = face_cascade.detectMultiScale(
82
+ gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30)
83
+ )
84
+
85
+ if len(faces) == 0:
86
+ logger.warning("No face detected in the image. Using center crop instead.")
87
+ return _fallback_preprocessing(image)
88
+
89
+ # Get the largest face (assuming it's the main subject)
90
+ x, y, w, h = max(faces, key=lambda rect: rect[2] * rect[3])
91
+
92
+ # Add padding around the face based on user preference
93
+ padding_x = int(w * crop_tightness)
94
+ padding_y = int(h * crop_tightness)
95
+
96
+ # Ensure we don't go out of bounds
97
+ x1 = max(0, x - padding_x)
98
+ y1 = max(0, y - padding_y)
99
+ x2 = min(img_array.shape[1], x + w + padding_x)
100
+ y2 = min(img_array.shape[0], y + h + padding_y)
101
+
102
+ # Crop to face region
103
+ face_crop = img_array[y1:y2, x1:x2]
104
+
105
+ # Convert BGR to RGB first
106
+ face_crop_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
107
+
108
+ # Convert to grayscale
109
+ face_gray = cv2.cvtColor(face_crop_rgb, cv2.COLOR_RGB2GRAY)
110
+
111
+ # Resize to target size
112
+ target_size = IMAGE_TRANSFORMS["resize"]
113
+ face_resized = cv2.resize(
114
+ face_gray, (target_size, target_size), interpolation=cv2.INTER_AREA
115
+ )
116
+
117
+ # Convert grayscale to 3-channel RGB (replicate grayscale values)
118
+ face_rgb_3channel = cv2.cvtColor(face_resized, cv2.COLOR_GRAY2RGB)
119
+
120
+ # Convert back to PIL Image
121
+ face_pil = Image.fromarray(face_rgb_3channel)
122
+ return face_pil
123
+
124
+ except ImportError:
125
+ logger.error(
126
+ "OpenCV not installed. Please install it with: pip install opencv-python"
127
+ )
128
+ return _fallback_preprocessing(image)
129
+ except Exception as e:
130
+ logger.error(f"Error in face detection: {str(e)}")
131
+ return _fallback_preprocessing(image)
132
+
133
+
134
+ def _fallback_preprocessing(
135
+ image: Union[Image.Image, np.ndarray, Any],
136
+ ) -> Optional[Image.Image]:
137
+ """Fallback preprocessing when face detection fails."""
138
+ try:
139
+ if isinstance(image, Image.Image):
140
+ rgb_pil = image.convert("RGB")
141
+ target_size = IMAGE_TRANSFORMS["resize"]
142
+ resized = rgb_pil.resize(
143
+ (target_size, target_size), Image.Resampling.LANCZOS
144
+ )
145
+ # Convert to grayscale and then to 3-channel RGB
146
+ gray_pil = resized.convert("L")
147
+ gray_rgb_pil = gray_pil.convert("RGB")
148
+ return gray_rgb_pil
149
+ return None
150
+ except Exception as e:
151
+ logger.error(f"Fallback preprocessing failed: {str(e)}")
152
+ return None
153
+
154
+
155
+ def get_vision_transforms():
156
+ """Get the image transforms used during training."""
157
+ from torchvision import transforms
158
+
159
+ return transforms.Compose(
160
+ [
161
+ transforms.Resize(IMAGE_TRANSFORMS["resize"]),
162
+ transforms.CenterCrop(IMAGE_TRANSFORMS["center_crop"]),
163
+ transforms.ToTensor(),
164
+ transforms.Normalize(
165
+ mean=IMAGE_TRANSFORMS["normalize_mean"],
166
+ std=IMAGE_TRANSFORMS["normalize_std"],
167
+ ),
168
+ ]
169
+ )
170
+
171
+
172
+ def preprocess_audio_for_model(audio_bytes: bytes) -> Optional[torch.Tensor]:
173
+ """
174
+ Preprocess audio bytes for wav2vec2 model input using AutoFeatureExtractor.
175
+
176
+ Args:
177
+ audio_bytes: Raw audio bytes
178
+
179
+ Returns:
180
+ Preprocessed audio tensor ready for wav2vec2 model
181
+ """
182
+ if not TORCH_AVAILABLE:
183
+ logger.error("PyTorch not available. Cannot process audio.")
184
+ return None
185
+
186
+ try:
187
+ from transformers import AutoFeatureExtractor
188
+ import librosa
189
+
190
+ # Save audio bytes to temporary file
191
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
192
+ tmp_file.write(audio_bytes)
193
+ tmp_file_path = tmp_file.name
194
+
195
+ try:
196
+ # Load and resample audio to target sampling rate
197
+ audio, sr = librosa.load(
198
+ tmp_file_path, sr=AUDIO_MODEL_CONFIG["target_sampling_rate"]
199
+ )
200
+
201
+ # Use AutoFeatureExtractor (same as training)
202
+ feature_extractor = AutoFeatureExtractor.from_pretrained(
203
+ AUDIO_MODEL_CONFIG["model_name"]
204
+ )
205
+
206
+ # Calculate max length in samples (5 seconds * 16kHz = 80000 samples)
207
+ max_length = int(
208
+ AUDIO_MODEL_CONFIG["max_duration"]
209
+ * AUDIO_MODEL_CONFIG["target_sampling_rate"]
210
+ )
211
+
212
+ logger.info(f"Audio length: {len(audio)} samples, max_length: {max_length}")
213
+
214
+ inputs = feature_extractor(
215
+ audio,
216
+ sampling_rate=AUDIO_MODEL_CONFIG["target_sampling_rate"],
217
+ max_length=max_length,
218
+ truncation=True,
219
+ padding="max_length",
220
+ return_tensors="pt",
221
+ )
222
+
223
+ # Return tensor with correct shape for wav2vec2
224
+ # The model expects: [batch_size, sequence_length]
225
+ tensor = inputs.input_values
226
+
227
+ # Log the tensor shape for debugging
228
+ logger.info(f"Audio preprocessing output shape: {tensor.shape}")
229
+
230
+ return tensor
231
+
232
+ finally:
233
+ # Clean up temporary file
234
+ try:
235
+ os.unlink(tmp_file_path)
236
+ except (OSError, PermissionError):
237
+ pass
238
+
239
+ except ImportError as e:
240
+ logger.error(f"Required library not installed: {str(e)}")
241
+ raise ImportError("Please install: pip install transformers librosa torch")
242
+
243
+
244
+ def extract_frames_from_video(video_file, max_frames: int = 5) -> List[Any]:
245
+ """
246
+ Extract frames from video file for vision sentiment analysis.
247
+
248
+ Args:
249
+ video_file: Video file object
250
+ max_frames: Maximum number of frames to extract
251
+
252
+ Returns:
253
+ List of PIL Image objects
254
+ """
255
+ try:
256
+ import cv2
257
+
258
+ # Save video bytes to temporary file
259
+ with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_file:
260
+ if hasattr(video_file, "getvalue"):
261
+ tmp_file.write(video_file.getvalue())
262
+ else:
263
+ tmp_file.write(video_file)
264
+ tmp_file_path = tmp_file.name
265
+
266
+ try:
267
+ # Open video with OpenCV
268
+ cap = cv2.VideoCapture(tmp_file_path)
269
+
270
+ if not cap.isOpened():
271
+ logger.error("Could not open video file")
272
+ return []
273
+
274
+ frames = []
275
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
276
+ fps = cap.get(cv2.CAP_PROP_FPS)
277
+ duration = total_frames / fps if fps > 0 else 0
278
+
279
+ logger.info(
280
+ f"Video: {total_frames} frames, {fps:.1f} FPS, {duration:.1f}s duration"
281
+ )
282
+
283
+ # Extract frames at strategic intervals
284
+ if total_frames > 0:
285
+ # Select frames: start, 25%, 50%, 75%, end
286
+ frame_indices = [
287
+ 0,
288
+ int(total_frames * 0.25),
289
+ int(total_frames * 0.5),
290
+ int(total_frames * 0.75),
291
+ total_frames - 1,
292
+ ]
293
+ frame_indices = list(set(frame_indices)) # Remove duplicates
294
+ frame_indices.sort()
295
+
296
+ for frame_idx in frame_indices:
297
+ cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
298
+ ret, frame = cap.read()
299
+ if ret:
300
+ # Convert BGR to RGB
301
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
302
+ # Convert to PIL Image
303
+ pil_image = Image.fromarray(frame_rgb)
304
+ frames.append(pil_image)
305
+
306
+ cap.release()
307
+ return frames
308
+
309
+ finally:
310
+ # Clean up temporary file
311
+ try:
312
+ os.unlink(tmp_file_path)
313
+ except (OSError, PermissionError):
314
+ pass
315
+
316
+ except ImportError:
317
+ logger.error(
318
+ "OpenCV not installed. Please install it with: pip install opencv-python"
319
+ )
320
+ return []
321
+ except Exception as e:
322
+ logger.error(f"Error extracting frames: {str(e)}")
323
+ return []
324
+
325
+
326
+ def extract_audio_from_video(video_file) -> Optional[bytes]:
327
+ """
328
+ Extract audio from video file for audio sentiment analysis.
329
+
330
+ Args:
331
+ video_file: Video file object
332
+
333
+ Returns:
334
+ Audio bytes in WAV format or None if extraction fails
335
+ """
336
+ try:
337
+ import tempfile
338
+
339
+ try:
340
+ from moviepy import VideoFileClip
341
+ except ImportError as e:
342
+ logger.error(f"MoviePy import failed: {e}")
343
+ return None
344
+
345
+ # Save video bytes to temporary file
346
+ with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_file:
347
+ if hasattr(video_file, "getvalue"):
348
+ tmp_file.write(video_file.getvalue())
349
+ else:
350
+ tmp_file.write(video_file)
351
+ tmp_file_path = tmp_file.name
352
+
353
+ try:
354
+ # Extract audio using moviepy
355
+ video = VideoFileClip(tmp_file_path)
356
+ audio = video.audio
357
+
358
+ if audio is None:
359
+ logger.warning("No audio track found in video")
360
+ return None
361
+
362
+ # Save audio to temporary WAV file
363
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as audio_file:
364
+ audio_path = audio_file.name
365
+
366
+ # Export audio as WAV
367
+ audio.write_audiofile(audio_path, logger=None)
368
+
369
+ # Read the audio file and return bytes
370
+ with open(audio_path, "rb") as f:
371
+ audio_bytes = f.read()
372
+
373
+ # Clean up temporary audio file
374
+ try:
375
+ os.unlink(audio_path)
376
+ except (OSError, PermissionError):
377
+ pass
378
+
379
+ return audio_bytes
380
+
381
+ finally:
382
+ # Clean up temporary video file
383
+ try:
384
+ # Close video and audio objects first
385
+ if "video" in locals():
386
+ video.close()
387
+ if "audio" in locals() and audio:
388
+ audio.close()
389
+
390
+ # Wait a bit before trying to delete
391
+ import time
392
+
393
+ time.sleep(0.1)
394
+
395
+ os.unlink(tmp_file_path)
396
+ except (OSError, PermissionError):
397
+ pass
398
+
399
+ except ImportError:
400
+ logger.error(
401
+ "MoviePy not installed. Please install it with: pip install moviepy"
402
+ )
403
+ return None
404
+ except Exception as e:
405
+ logger.error(f"Error extracting audio: {str(e)}")
406
+ return None
407
+
408
+
409
+ def transcribe_audio(audio_bytes: bytes) -> str:
410
+ """
411
+ Transcribe audio to text for text sentiment analysis.
412
+
413
+ Args:
414
+ audio_bytes: Audio bytes in WAV format
415
+
416
+ Returns:
417
+ Transcribed text string
418
+ """
419
+ if audio_bytes is None:
420
+ return ""
421
+
422
+ try:
423
+ import tempfile
424
+ import speech_recognition as sr
425
+
426
+ # Save audio bytes to temporary file
427
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
428
+ tmp_file.write(audio_bytes)
429
+ tmp_file_path = tmp_file.name
430
+
431
+ try:
432
+ # Initialize recognizer
433
+ recognizer = sr.Recognizer()
434
+
435
+ # Load audio file
436
+ with sr.AudioFile(tmp_file_path) as source:
437
+ # Read audio data
438
+ audio_data = recognizer.record(source)
439
+
440
+ # Transcribe using Google Speech Recognition
441
+ try:
442
+ text = recognizer.recognize_google(audio_data)
443
+ return text
444
+ except sr.UnknownValueError:
445
+ logger.warning("Speech could not be understood")
446
+ return ""
447
+ except sr.RequestError as e:
448
+ logger.error(
449
+ f"Could not request results from speech recognition service: {e}"
450
+ )
451
+ return ""
452
+
453
+ finally:
454
+ # Clean up temporary file
455
+ try:
456
+ os.unlink(tmp_file_path)
457
+ except (OSError, PermissionError):
458
+ pass
459
+
460
+ except ImportError:
461
+ logger.error(
462
+ "SpeechRecognition not installed. Please install it with: pip install SpeechRecognition"
463
+ )
464
+ return ""
465
+ except Exception as e:
466
+ logger.error(f"Error transcribing audio: {str(e)}")
467
+ return ""
src/utils/sentiment_mapping.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Sentiment mapping utilities for different model outputs.
3
+ """
4
+
5
+ from typing import Dict
6
+ from ..config.settings import SENTIMENT_MAPPINGS
7
+
8
+
9
+ def get_sentiment_mapping(num_classes: int) -> Dict[int, str]:
10
+ """
11
+ Get the sentiment mapping based on number of classes.
12
+
13
+ Args:
14
+ num_classes: Number of output classes from the model
15
+
16
+ Returns:
17
+ Dictionary mapping class indices to sentiment labels
18
+ """
19
+ return SENTIMENT_MAPPINGS.get(
20
+ num_classes, {i: f"Class_{i}" for i in range(num_classes)}
21
+ )
22
+
23
+
24
+ def get_sentiment_colors() -> Dict[str, str]:
25
+ """
26
+ Get color-coded sentiment display mapping.
27
+
28
+ Returns:
29
+ Dictionary mapping sentiment labels to emoji indicators
30
+ """
31
+ return {
32
+ "Positive": "🟢",
33
+ "Negative": "🔴",
34
+ "Neutral": "🟡",
35
+ "Angry": "🔴",
36
+ "Sad": "🔵",
37
+ "Happy": "🟢",
38
+ "Fear": "🟣",
39
+ "Disgust": "🟠",
40
+ "Surprise": "🟡",
41
+ }
42
+
43
+
44
+ def format_sentiment_result(
45
+ sentiment: str, confidence: float, input_info: str = "", model_name: str = ""
46
+ ) -> str:
47
+ """
48
+ Format sentiment analysis result for display.
49
+
50
+ Args:
51
+ sentiment: Predicted sentiment label
52
+ confidence: Confidence score
53
+ input_info: Information about the input
54
+ model_name: Name of the model used
55
+
56
+ Returns:
57
+ Formatted result string
58
+ """
59
+ colors = get_sentiment_colors()
60
+ emoji = colors.get(sentiment, "❓")
61
+
62
+ result = f"{emoji} Sentiment: {sentiment}\n"
63
+ result += f"Confidence: {confidence:.2f}\n"
64
+
65
+ if input_info:
66
+ result += f"Input: {input_info}\n"
67
+
68
+ if model_name:
69
+ result += f"Model: {model_name}\n"
70
+
71
+ return result
simple_model_manager.py → src/utils/simple_model_manager.py RENAMED
@@ -18,7 +18,7 @@ logger = logging.getLogger(__name__)
18
  class SimpleModelManager:
19
  """Simple model manager that downloads models from Google Drive using gdown"""
20
 
21
- def __init__(self, model_dir: str = "models", cache_models: bool = True):
22
  """
23
  Initialize simple model manager
24
 
 
18
  class SimpleModelManager:
19
  """Simple model manager that downloads models from Google Drive using gdown"""
20
 
21
+ def __init__(self, model_dir: str = "model_weights", cache_models: bool = True):
22
  """
23
  Initialize simple model manager
24
 
uv.lock CHANGED
@@ -475,6 +475,15 @@ wheels = [
475
  { url = "https://files.pythonhosted.org/packages/42/14/42b2651a2f46b022ccd948bca9f2d5af0fd8929c4eec235b8d6d844fbe67/filelock-3.19.1-py3-none-any.whl", hash = "sha256:d38e30481def20772f5baf097c122c3babc4fcdb7e14e57049eb9d88c6dc017d", size = 15988, upload_time = "2025-08-14T16:56:01.633Z" },
476
  ]
477
 
 
 
 
 
 
 
 
 
 
478
  [[package]]
479
  name = "gdown"
480
  version = "5.2.0"
@@ -602,6 +611,18 @@ wheels = [
602
  { url = "https://files.pythonhosted.org/packages/2c/c6/fa760e12a2483469e2bf5058c5faff664acf66cadb4df2ad6205b016a73d/imageio_ffmpeg-0.6.0-py3-none-win_amd64.whl", hash = "sha256:02fa47c83703c37df6bfe4896aab339013f62bf02c5ebf2dce6da56af04ffc0a", size = 31246824, upload_time = "2025-01-16T21:34:28.6Z" },
603
  ]
604
 
 
 
 
 
 
 
 
 
 
 
 
 
605
  [[package]]
606
  name = "jinja2"
607
  version = "3.1.6"
@@ -728,6 +749,15 @@ wheels = [
728
  { url = "https://files.pythonhosted.org/packages/9a/73/7d3b2010baa0b5eb1e4dfa9e4385e89b6716be76f2fa21a6c0fe34b68e5a/moviepy-2.2.1-py3-none-any.whl", hash = "sha256:6b56803fec2ac54b557404126ac1160e65448e03798fa282bd23e8fab3795060", size = 129871, upload_time = "2025-05-21T19:31:50.11Z" },
729
  ]
730
 
 
 
 
 
 
 
 
 
 
731
  [[package]]
732
  name = "narwhals"
733
  version = "2.2.0"
@@ -737,6 +767,44 @@ wheels = [
737
  { url = "https://files.pythonhosted.org/packages/dd/54/1ecca75e51d7da8ca53d1ffa8636ef9077a6eaa31f43ade71360b3e6449a/narwhals-2.2.0-py3-none-any.whl", hash = "sha256:2b5e3d61a486fa4328c286b0c8018b3e781a964947ff725d66ba12f6d5ca3d2a", size = 401021, upload_time = "2025-08-25T07:51:56.97Z" },
738
  ]
739
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
740
  [[package]]
741
  name = "numpy"
742
  version = "2.0.2"
@@ -860,6 +928,132 @@ wheels = [
860
  { url = "https://files.pythonhosted.org/packages/37/48/ac2a9584402fb6c0cd5b5d1a91dcf176b15760130dd386bbafdbfe3640bf/numpy-2.2.6-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d042d24c90c41b54fd506da306759e06e568864df8ec17ccc17e9e884634fd00", size = 12812666, upload_time = "2025-05-17T21:45:31.426Z" },
861
  ]
862
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
863
  [[package]]
864
  name = "opencv-python-headless"
865
  version = "4.12.0.88"
@@ -1446,9 +1640,11 @@ dependencies = [
1446
  { name = "gdown" },
1447
  { name = "moviepy" },
1448
  { name = "opencv-python-headless" },
 
1449
  { name = "python-dotenv" },
1450
  { name = "speechrecognition" },
1451
  { name = "streamlit-webrtc" },
 
1452
  ]
1453
 
1454
  [package.metadata]
@@ -1456,9 +1652,20 @@ requires-dist = [
1456
  { name = "gdown", specifier = ">=5.2.0" },
1457
  { name = "moviepy", specifier = ">=2.2.1" },
1458
  { name = "opencv-python-headless", specifier = ">=4.8.0" },
 
1459
  { name = "python-dotenv", specifier = ">=1.1.1" },
1460
  { name = "speechrecognition", specifier = ">=3.10.0" },
1461
  { name = "streamlit-webrtc", specifier = ">=0.47.0" },
 
 
 
 
 
 
 
 
 
 
1462
  ]
1463
 
1464
  [[package]]
@@ -1570,6 +1777,18 @@ wheels = [
1570
  { url = "https://files.pythonhosted.org/packages/bf/8c/22b4506ee42436968ef02a432325008746bd8467299da374d68c98352eff/streamlit_webrtc-0.63.4-py3-none-any.whl", hash = "sha256:70852a930a94e3ecb86a2f3573048ec4c3cd9e2713c7ad4b77bb502620bd726c", size = 216926, upload_time = "2025-08-06T10:02:05.505Z" },
1571
  ]
1572
 
 
 
 
 
 
 
 
 
 
 
 
 
1573
  [[package]]
1574
  name = "tenacity"
1575
  version = "9.1.2"
@@ -1588,6 +1807,63 @@ wheels = [
1588
  { url = "https://files.pythonhosted.org/packages/44/6f/7120676b6d73228c96e17f1f794d8ab046fc910d781c8d151120c3f1569e/toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", size = 16588, upload_time = "2020-11-01T01:40:20.672Z" },
1589
  ]
1590
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1591
  [[package]]
1592
  name = "tornado"
1593
  version = "6.5.2"
@@ -1619,6 +1895,23 @@ wheels = [
1619
  { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540, upload_time = "2024-11-24T20:12:19.698Z" },
1620
  ]
1621
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1622
  [[package]]
1623
  name = "typing-extensions"
1624
  version = "4.14.1"
@@ -1663,3 +1956,12 @@ wheels = [
1663
  { url = "https://files.pythonhosted.org/packages/db/d9/c495884c6e548fce18a8f40568ff120bc3a4b7b99813081c8ac0c936fa64/watchdog-6.0.0-py3-none-win_amd64.whl", hash = "sha256:cbafb470cf848d93b5d013e2ecb245d4aa1c8fd0504e863ccefa32445359d680", size = 79070, upload_time = "2024-11-01T14:07:10.686Z" },
1664
  { url = "https://files.pythonhosted.org/packages/33/e8/e40370e6d74ddba47f002a32919d91310d6074130fe4e17dabcafc15cbf1/watchdog-6.0.0-py3-none-win_ia64.whl", hash = "sha256:a1914259fa9e1454315171103c6a30961236f508b9b623eae470268bbcc6a22f", size = 79067, upload_time = "2024-11-01T14:07:11.845Z" },
1665
  ]
 
 
 
 
 
 
 
 
 
 
475
  { url = "https://files.pythonhosted.org/packages/42/14/42b2651a2f46b022ccd948bca9f2d5af0fd8929c4eec235b8d6d844fbe67/filelock-3.19.1-py3-none-any.whl", hash = "sha256:d38e30481def20772f5baf097c122c3babc4fcdb7e14e57049eb9d88c6dc017d", size = 15988, upload_time = "2025-08-14T16:56:01.633Z" },
476
  ]
477
 
478
+ [[package]]
479
+ name = "fsspec"
480
+ version = "2025.7.0"
481
+ source = { registry = "https://pypi.org/simple" }
482
+ sdist = { url = "https://files.pythonhosted.org/packages/8b/02/0835e6ab9cfc03916fe3f78c0956cfcdb6ff2669ffa6651065d5ebf7fc98/fsspec-2025.7.0.tar.gz", hash = "sha256:786120687ffa54b8283d942929540d8bc5ccfa820deb555a2b5d0ed2b737bf58", size = 304432, upload_time = "2025-07-15T16:05:21.19Z" }
483
+ wheels = [
484
+ { url = "https://files.pythonhosted.org/packages/2f/e0/014d5d9d7a4564cf1c40b5039bc882db69fd881111e03ab3657ac0b218e2/fsspec-2025.7.0-py3-none-any.whl", hash = "sha256:8b012e39f63c7d5f10474de957f3ab793b47b45ae7d39f2fb735f8bbe25c0e21", size = 199597, upload_time = "2025-07-15T16:05:19.529Z" },
485
+ ]
486
+
487
  [[package]]
488
  name = "gdown"
489
  version = "5.2.0"
 
611
  { url = "https://files.pythonhosted.org/packages/2c/c6/fa760e12a2483469e2bf5058c5faff664acf66cadb4df2ad6205b016a73d/imageio_ffmpeg-0.6.0-py3-none-win_amd64.whl", hash = "sha256:02fa47c83703c37df6bfe4896aab339013f62bf02c5ebf2dce6da56af04ffc0a", size = 31246824, upload_time = "2025-01-16T21:34:28.6Z" },
612
  ]
613
 
614
+ [[package]]
615
+ name = "importlib-metadata"
616
+ version = "8.7.0"
617
+ source = { registry = "https://pypi.org/simple" }
618
+ dependencies = [
619
+ { name = "zipp", marker = "python_full_version < '3.10'" },
620
+ ]
621
+ sdist = { url = "https://files.pythonhosted.org/packages/76/66/650a33bd90f786193e4de4b3ad86ea60b53c89b669a5c7be931fac31cdb0/importlib_metadata-8.7.0.tar.gz", hash = "sha256:d13b81ad223b890aa16c5471f2ac3056cf76c5f10f82d6f9292f0b415f389000", size = 56641, upload_time = "2025-04-27T15:29:01.736Z" }
622
+ wheels = [
623
+ { url = "https://files.pythonhosted.org/packages/20/b0/36bd937216ec521246249be3bf9855081de4c5e06a0c9b4219dbeda50373/importlib_metadata-8.7.0-py3-none-any.whl", hash = "sha256:e5dd1551894c77868a30651cef00984d50e1002d06942a7101d34870c5f02afd", size = 27656, upload_time = "2025-04-27T15:29:00.214Z" },
624
+ ]
625
+
626
  [[package]]
627
  name = "jinja2"
628
  version = "3.1.6"
 
749
  { url = "https://files.pythonhosted.org/packages/9a/73/7d3b2010baa0b5eb1e4dfa9e4385e89b6716be76f2fa21a6c0fe34b68e5a/moviepy-2.2.1-py3-none-any.whl", hash = "sha256:6b56803fec2ac54b557404126ac1160e65448e03798fa282bd23e8fab3795060", size = 129871, upload_time = "2025-05-21T19:31:50.11Z" },
750
  ]
751
 
752
+ [[package]]
753
+ name = "mpmath"
754
+ version = "1.3.0"
755
+ source = { registry = "https://pypi.org/simple" }
756
+ sdist = { url = "https://files.pythonhosted.org/packages/e0/47/dd32fa426cc72114383ac549964eecb20ecfd886d1e5ccf5340b55b02f57/mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f", size = 508106, upload_time = "2023-03-07T16:47:11.061Z" }
757
+ wheels = [
758
+ { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198, upload_time = "2023-03-07T16:47:09.197Z" },
759
+ ]
760
+
761
  [[package]]
762
  name = "narwhals"
763
  version = "2.2.0"
 
767
  { url = "https://files.pythonhosted.org/packages/dd/54/1ecca75e51d7da8ca53d1ffa8636ef9077a6eaa31f43ade71360b3e6449a/narwhals-2.2.0-py3-none-any.whl", hash = "sha256:2b5e3d61a486fa4328c286b0c8018b3e781a964947ff725d66ba12f6d5ca3d2a", size = 401021, upload_time = "2025-08-25T07:51:56.97Z" },
768
  ]
769
 
770
+ [[package]]
771
+ name = "networkx"
772
+ version = "3.2.1"
773
+ source = { registry = "https://pypi.org/simple" }
774
+ resolution-markers = [
775
+ "python_full_version < '3.10'",
776
+ ]
777
+ sdist = { url = "https://files.pythonhosted.org/packages/c4/80/a84676339aaae2f1cfdf9f418701dd634aef9cc76f708ef55c36ff39c3ca/networkx-3.2.1.tar.gz", hash = "sha256:9f1bb5cf3409bf324e0a722c20bdb4c20ee39bf1c30ce8ae499c8502b0b5e0c6", size = 2073928, upload_time = "2023-10-28T08:41:39.364Z" }
778
+ wheels = [
779
+ { url = "https://files.pythonhosted.org/packages/d5/f0/8fbc882ca80cf077f1b246c0e3c3465f7f415439bdea6b899f6b19f61f70/networkx-3.2.1-py3-none-any.whl", hash = "sha256:f18c69adc97877c42332c170849c96cefa91881c99a7cb3e95b7c659ebdc1ec2", size = 1647772, upload_time = "2023-10-28T08:41:36.945Z" },
780
+ ]
781
+
782
+ [[package]]
783
+ name = "networkx"
784
+ version = "3.4.2"
785
+ source = { registry = "https://pypi.org/simple" }
786
+ resolution-markers = [
787
+ "python_full_version == '3.10.*'",
788
+ ]
789
+ sdist = { url = "https://files.pythonhosted.org/packages/fd/1d/06475e1cd5264c0b870ea2cc6fdb3e37177c1e565c43f56ff17a10e3937f/networkx-3.4.2.tar.gz", hash = "sha256:307c3669428c5362aab27c8a1260aa8f47c4e91d3891f48be0141738d8d053e1", size = 2151368, upload_time = "2024-10-21T12:39:38.695Z" }
790
+ wheels = [
791
+ { url = "https://files.pythonhosted.org/packages/b9/54/dd730b32ea14ea797530a4479b2ed46a6fb250f682a9cfb997e968bf0261/networkx-3.4.2-py3-none-any.whl", hash = "sha256:df5d4365b724cf81b8c6a7312509d0c22386097011ad1abe274afd5e9d3bbc5f", size = 1723263, upload_time = "2024-10-21T12:39:36.247Z" },
792
+ ]
793
+
794
+ [[package]]
795
+ name = "networkx"
796
+ version = "3.5"
797
+ source = { registry = "https://pypi.org/simple" }
798
+ resolution-markers = [
799
+ "python_full_version >= '3.13'",
800
+ "python_full_version == '3.12.*'",
801
+ "python_full_version == '3.11.*'",
802
+ ]
803
+ sdist = { url = "https://files.pythonhosted.org/packages/6c/4f/ccdb8ad3a38e583f214547fd2f7ff1fc160c43a75af88e6aec213404b96a/networkx-3.5.tar.gz", hash = "sha256:d4c6f9cf81f52d69230866796b82afbccdec3db7ae4fbd1b65ea750feed50037", size = 2471065, upload_time = "2025-05-29T11:35:07.804Z" }
804
+ wheels = [
805
+ { url = "https://files.pythonhosted.org/packages/eb/8d/776adee7bbf76365fdd7f2552710282c79a4ead5d2a46408c9043a2b70ba/networkx-3.5-py3-none-any.whl", hash = "sha256:0030d386a9a06dee3565298b4a734b68589749a544acbb6c412dc9e2489ec6ec", size = 2034406, upload_time = "2025-05-29T11:35:04.961Z" },
806
+ ]
807
+
808
  [[package]]
809
  name = "numpy"
810
  version = "2.0.2"
 
928
  { url = "https://files.pythonhosted.org/packages/37/48/ac2a9584402fb6c0cd5b5d1a91dcf176b15760130dd386bbafdbfe3640bf/numpy-2.2.6-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d042d24c90c41b54fd506da306759e06e568864df8ec17ccc17e9e884634fd00", size = 12812666, upload_time = "2025-05-17T21:45:31.426Z" },
929
  ]
930
 
931
+ [[package]]
932
+ name = "nvidia-cublas-cu12"
933
+ version = "12.8.4.1"
934
+ source = { registry = "https://pypi.org/simple" }
935
+ wheels = [
936
+ { url = "https://files.pythonhosted.org/packages/dc/61/e24b560ab2e2eaeb3c839129175fb330dfcfc29e5203196e5541a4c44682/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142", size = 594346921, upload_time = "2025-03-07T01:44:31.254Z" },
937
+ ]
938
+
939
+ [[package]]
940
+ name = "nvidia-cuda-cupti-cu12"
941
+ version = "12.8.90"
942
+ source = { registry = "https://pypi.org/simple" }
943
+ wheels = [
944
+ { url = "https://files.pythonhosted.org/packages/f8/02/2adcaa145158bf1a8295d83591d22e4103dbfd821bcaf6f3f53151ca4ffa/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182", size = 10248621, upload_time = "2025-03-07T01:40:21.213Z" },
945
+ ]
946
+
947
+ [[package]]
948
+ name = "nvidia-cuda-nvrtc-cu12"
949
+ version = "12.8.93"
950
+ source = { registry = "https://pypi.org/simple" }
951
+ wheels = [
952
+ { url = "https://files.pythonhosted.org/packages/05/6b/32f747947df2da6994e999492ab306a903659555dddc0fbdeb9d71f75e52/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994", size = 88040029, upload_time = "2025-03-07T01:42:13.562Z" },
953
+ ]
954
+
955
+ [[package]]
956
+ name = "nvidia-cuda-runtime-cu12"
957
+ version = "12.8.90"
958
+ source = { registry = "https://pypi.org/simple" }
959
+ wheels = [
960
+ { url = "https://files.pythonhosted.org/packages/0d/9b/a997b638fcd068ad6e4d53b8551a7d30fe8b404d6f1804abf1df69838932/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90", size = 954765, upload_time = "2025-03-07T01:40:01.615Z" },
961
+ ]
962
+
963
+ [[package]]
964
+ name = "nvidia-cudnn-cu12"
965
+ version = "9.10.2.21"
966
+ source = { registry = "https://pypi.org/simple" }
967
+ dependencies = [
968
+ { name = "nvidia-cublas-cu12" },
969
+ ]
970
+ wheels = [
971
+ { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload_time = "2025-06-06T21:54:08.597Z" },
972
+ ]
973
+
974
+ [[package]]
975
+ name = "nvidia-cufft-cu12"
976
+ version = "11.3.3.83"
977
+ source = { registry = "https://pypi.org/simple" }
978
+ dependencies = [
979
+ { name = "nvidia-nvjitlink-cu12" },
980
+ ]
981
+ wheels = [
982
+ { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload_time = "2025-03-07T01:45:27.821Z" },
983
+ ]
984
+
985
+ [[package]]
986
+ name = "nvidia-cufile-cu12"
987
+ version = "1.13.1.3"
988
+ source = { registry = "https://pypi.org/simple" }
989
+ wheels = [
990
+ { url = "https://files.pythonhosted.org/packages/bb/fe/1bcba1dfbfb8d01be8d93f07bfc502c93fa23afa6fd5ab3fc7c1df71038a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc", size = 1197834, upload_time = "2025-03-07T01:45:50.723Z" },
991
+ ]
992
+
993
+ [[package]]
994
+ name = "nvidia-curand-cu12"
995
+ version = "10.3.9.90"
996
+ source = { registry = "https://pypi.org/simple" }
997
+ wheels = [
998
+ { url = "https://files.pythonhosted.org/packages/fb/aa/6584b56dc84ebe9cf93226a5cde4d99080c8e90ab40f0c27bda7a0f29aa1/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9", size = 63619976, upload_time = "2025-03-07T01:46:23.323Z" },
999
+ ]
1000
+
1001
+ [[package]]
1002
+ name = "nvidia-cusolver-cu12"
1003
+ version = "11.7.3.90"
1004
+ source = { registry = "https://pypi.org/simple" }
1005
+ dependencies = [
1006
+ { name = "nvidia-cublas-cu12" },
1007
+ { name = "nvidia-cusparse-cu12" },
1008
+ { name = "nvidia-nvjitlink-cu12" },
1009
+ ]
1010
+ wheels = [
1011
+ { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload_time = "2025-03-07T01:47:16.273Z" },
1012
+ ]
1013
+
1014
+ [[package]]
1015
+ name = "nvidia-cusparse-cu12"
1016
+ version = "12.5.8.93"
1017
+ source = { registry = "https://pypi.org/simple" }
1018
+ dependencies = [
1019
+ { name = "nvidia-nvjitlink-cu12" },
1020
+ ]
1021
+ wheels = [
1022
+ { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload_time = "2025-03-07T01:48:13.779Z" },
1023
+ ]
1024
+
1025
+ [[package]]
1026
+ name = "nvidia-cusparselt-cu12"
1027
+ version = "0.7.1"
1028
+ source = { registry = "https://pypi.org/simple" }
1029
+ wheels = [
1030
+ { url = "https://files.pythonhosted.org/packages/56/79/12978b96bd44274fe38b5dde5cfb660b1d114f70a65ef962bcbbed99b549/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623", size = 287193691, upload_time = "2025-02-26T00:15:44.104Z" },
1031
+ ]
1032
+
1033
+ [[package]]
1034
+ name = "nvidia-nccl-cu12"
1035
+ version = "2.27.3"
1036
+ source = { registry = "https://pypi.org/simple" }
1037
+ wheels = [
1038
+ { url = "https://files.pythonhosted.org/packages/5c/5b/4e4fff7bad39adf89f735f2bc87248c81db71205b62bcc0d5ca5b606b3c3/nvidia_nccl_cu12-2.27.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adf27ccf4238253e0b826bce3ff5fa532d65fc42322c8bfdfaf28024c0fbe039", size = 322364134, upload_time = "2025-06-03T21:58:04.013Z" },
1039
+ ]
1040
+
1041
+ [[package]]
1042
+ name = "nvidia-nvjitlink-cu12"
1043
+ version = "12.8.93"
1044
+ source = { registry = "https://pypi.org/simple" }
1045
+ wheels = [
1046
+ { url = "https://files.pythonhosted.org/packages/f6/74/86a07f1d0f42998ca31312f998bd3b9a7eff7f52378f4f270c8679c77fb9/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88", size = 39254836, upload_time = "2025-03-07T01:49:55.661Z" },
1047
+ ]
1048
+
1049
+ [[package]]
1050
+ name = "nvidia-nvtx-cu12"
1051
+ version = "12.8.90"
1052
+ source = { registry = "https://pypi.org/simple" }
1053
+ wheels = [
1054
+ { url = "https://files.pythonhosted.org/packages/a2/eb/86626c1bbc2edb86323022371c39aa48df6fd8b0a1647bc274577f72e90b/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f", size = 89954, upload_time = "2025-03-07T01:42:44.131Z" },
1055
+ ]
1056
+
1057
  [[package]]
1058
  name = "opencv-python-headless"
1059
  version = "4.12.0.88"
 
1640
  { name = "gdown" },
1641
  { name = "moviepy" },
1642
  { name = "opencv-python-headless" },
1643
+ { name = "pillow" },
1644
  { name = "python-dotenv" },
1645
  { name = "speechrecognition" },
1646
  { name = "streamlit-webrtc" },
1647
+ { name = "torch" },
1648
  ]
1649
 
1650
  [package.metadata]
 
1652
  { name = "gdown", specifier = ">=5.2.0" },
1653
  { name = "moviepy", specifier = ">=2.2.1" },
1654
  { name = "opencv-python-headless", specifier = ">=4.8.0" },
1655
+ { name = "pillow", specifier = ">=11.3.0" },
1656
  { name = "python-dotenv", specifier = ">=1.1.1" },
1657
  { name = "speechrecognition", specifier = ">=3.10.0" },
1658
  { name = "streamlit-webrtc", specifier = ">=0.47.0" },
1659
+ { name = "torch", specifier = ">=2.8.0" },
1660
+ ]
1661
+
1662
+ [[package]]
1663
+ name = "setuptools"
1664
+ version = "80.9.0"
1665
+ source = { registry = "https://pypi.org/simple" }
1666
+ sdist = { url = "https://files.pythonhosted.org/packages/18/5d/3bf57dcd21979b887f014ea83c24ae194cfcd12b9e0fda66b957c69d1fca/setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c", size = 1319958, upload_time = "2025-05-27T00:56:51.443Z" }
1667
+ wheels = [
1668
+ { url = "https://files.pythonhosted.org/packages/a3/dc/17031897dae0efacfea57dfd3a82fdd2a2aeb58e0ff71b77b87e44edc772/setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922", size = 1201486, upload_time = "2025-05-27T00:56:49.664Z" },
1669
  ]
1670
 
1671
  [[package]]
 
1777
  { url = "https://files.pythonhosted.org/packages/bf/8c/22b4506ee42436968ef02a432325008746bd8467299da374d68c98352eff/streamlit_webrtc-0.63.4-py3-none-any.whl", hash = "sha256:70852a930a94e3ecb86a2f3573048ec4c3cd9e2713c7ad4b77bb502620bd726c", size = 216926, upload_time = "2025-08-06T10:02:05.505Z" },
1778
  ]
1779
 
1780
+ [[package]]
1781
+ name = "sympy"
1782
+ version = "1.14.0"
1783
+ source = { registry = "https://pypi.org/simple" }
1784
+ dependencies = [
1785
+ { name = "mpmath" },
1786
+ ]
1787
+ sdist = { url = "https://files.pythonhosted.org/packages/83/d3/803453b36afefb7c2bb238361cd4ae6125a569b4db67cd9e79846ba2d68c/sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517", size = 7793921, upload_time = "2025-04-27T18:05:01.611Z" }
1788
+ wheels = [
1789
+ { url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5", size = 6299353, upload_time = "2025-04-27T18:04:59.103Z" },
1790
+ ]
1791
+
1792
  [[package]]
1793
  name = "tenacity"
1794
  version = "9.1.2"
 
1807
  { url = "https://files.pythonhosted.org/packages/44/6f/7120676b6d73228c96e17f1f794d8ab046fc910d781c8d151120c3f1569e/toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", size = 16588, upload_time = "2020-11-01T01:40:20.672Z" },
1808
  ]
1809
 
1810
+ [[package]]
1811
+ name = "torch"
1812
+ version = "2.8.0"
1813
+ source = { registry = "https://pypi.org/simple" }
1814
+ dependencies = [
1815
+ { name = "filelock" },
1816
+ { name = "fsspec" },
1817
+ { name = "jinja2" },
1818
+ { name = "networkx", version = "3.2.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
1819
+ { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" },
1820
+ { name = "networkx", version = "3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
1821
+ { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
1822
+ { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
1823
+ { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
1824
+ { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
1825
+ { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
1826
+ { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
1827
+ { name = "nvidia-cufile-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
1828
+ { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
1829
+ { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
1830
+ { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
1831
+ { name = "nvidia-cusparselt-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
1832
+ { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
1833
+ { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
1834
+ { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
1835
+ { name = "setuptools", marker = "python_full_version >= '3.12'" },
1836
+ { name = "sympy" },
1837
+ { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
1838
+ { name = "typing-extensions" },
1839
+ ]
1840
+ wheels = [
1841
+ { url = "https://files.pythonhosted.org/packages/63/28/110f7274254f1b8476c561dada127173f994afa2b1ffc044efb773c15650/torch-2.8.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:0be92c08b44009d4131d1ff7a8060d10bafdb7ddcb7359ef8d8c5169007ea905", size = 102052793, upload_time = "2025-08-06T14:53:15.852Z" },
1842
+ { url = "https://files.pythonhosted.org/packages/70/1c/58da560016f81c339ae14ab16c98153d51c941544ae568da3cb5b1ceb572/torch-2.8.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:89aa9ee820bb39d4d72b794345cccef106b574508dd17dbec457949678c76011", size = 888025420, upload_time = "2025-08-06T14:54:18.014Z" },
1843
+ { url = "https://files.pythonhosted.org/packages/70/87/f69752d0dd4ba8218c390f0438130c166fa264a33b7025adb5014b92192c/torch-2.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:e8e5bf982e87e2b59d932769938b698858c64cc53753894be25629bdf5cf2f46", size = 241363614, upload_time = "2025-08-06T14:53:31.496Z" },
1844
+ { url = "https://files.pythonhosted.org/packages/ef/d6/e6d4c57e61c2b2175d3aafbfb779926a2cfd7c32eeda7c543925dceec923/torch-2.8.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:a3f16a58a9a800f589b26d47ee15aca3acf065546137fc2af039876135f4c760", size = 73611154, upload_time = "2025-08-06T14:53:10.919Z" },
1845
+ { url = "https://files.pythonhosted.org/packages/8f/c4/3e7a3887eba14e815e614db70b3b529112d1513d9dae6f4d43e373360b7f/torch-2.8.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:220a06fd7af8b653c35d359dfe1aaf32f65aa85befa342629f716acb134b9710", size = 102073391, upload_time = "2025-08-06T14:53:20.937Z" },
1846
+ { url = "https://files.pythonhosted.org/packages/5a/63/4fdc45a0304536e75a5e1b1bbfb1b56dd0e2743c48ee83ca729f7ce44162/torch-2.8.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:c12fa219f51a933d5f80eeb3a7a5d0cbe9168c0a14bbb4055f1979431660879b", size = 888063640, upload_time = "2025-08-06T14:55:05.325Z" },
1847
+ { url = "https://files.pythonhosted.org/packages/84/57/2f64161769610cf6b1c5ed782bd8a780e18a3c9d48931319f2887fa9d0b1/torch-2.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:8c7ef765e27551b2fbfc0f41bcf270e1292d9bf79f8e0724848b1682be6e80aa", size = 241366752, upload_time = "2025-08-06T14:53:38.692Z" },
1848
+ { url = "https://files.pythonhosted.org/packages/a4/5e/05a5c46085d9b97e928f3f037081d3d2b87fb4b4195030fc099aaec5effc/torch-2.8.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:5ae0524688fb6707c57a530c2325e13bb0090b745ba7b4a2cd6a3ce262572916", size = 73621174, upload_time = "2025-08-06T14:53:25.44Z" },
1849
+ { url = "https://files.pythonhosted.org/packages/49/0c/2fd4df0d83a495bb5e54dca4474c4ec5f9c62db185421563deeb5dabf609/torch-2.8.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:e2fab4153768d433f8ed9279c8133a114a034a61e77a3a104dcdf54388838705", size = 101906089, upload_time = "2025-08-06T14:53:52.631Z" },
1850
+ { url = "https://files.pythonhosted.org/packages/99/a8/6acf48d48838fb8fe480597d98a0668c2beb02ee4755cc136de92a0a956f/torch-2.8.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b2aca0939fb7e4d842561febbd4ffda67a8e958ff725c1c27e244e85e982173c", size = 887913624, upload_time = "2025-08-06T14:56:44.33Z" },
1851
+ { url = "https://files.pythonhosted.org/packages/af/8a/5c87f08e3abd825c7dfecef5a0f1d9aa5df5dd0e3fd1fa2f490a8e512402/torch-2.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:2f4ac52f0130275d7517b03a33d2493bab3693c83dcfadf4f81688ea82147d2e", size = 241326087, upload_time = "2025-08-06T14:53:46.503Z" },
1852
+ { url = "https://files.pythonhosted.org/packages/be/66/5c9a321b325aaecb92d4d1855421e3a055abd77903b7dab6575ca07796db/torch-2.8.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:619c2869db3ada2c0105487ba21b5008defcc472d23f8b80ed91ac4a380283b0", size = 73630478, upload_time = "2025-08-06T14:53:57.144Z" },
1853
+ { url = "https://files.pythonhosted.org/packages/10/4e/469ced5a0603245d6a19a556e9053300033f9c5baccf43a3d25ba73e189e/torch-2.8.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:2b2f96814e0345f5a5aed9bf9734efa913678ed19caf6dc2cddb7930672d6128", size = 101936856, upload_time = "2025-08-06T14:54:01.526Z" },
1854
+ { url = "https://files.pythonhosted.org/packages/16/82/3948e54c01b2109238357c6f86242e6ecbf0c63a1af46906772902f82057/torch-2.8.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:65616ca8ec6f43245e1f5f296603e33923f4c30f93d65e103d9e50c25b35150b", size = 887922844, upload_time = "2025-08-06T14:55:50.78Z" },
1855
+ { url = "https://files.pythonhosted.org/packages/e3/54/941ea0a860f2717d86a811adf0c2cd01b3983bdd460d0803053c4e0b8649/torch-2.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:659df54119ae03e83a800addc125856effda88b016dfc54d9f65215c3975be16", size = 241330968, upload_time = "2025-08-06T14:54:45.293Z" },
1856
+ { url = "https://files.pythonhosted.org/packages/de/69/8b7b13bba430f5e21d77708b616f767683629fc4f8037564a177d20f90ed/torch-2.8.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:1a62a1ec4b0498930e2543535cf70b1bef8c777713de7ceb84cd79115f553767", size = 73915128, upload_time = "2025-08-06T14:54:34.769Z" },
1857
+ { url = "https://files.pythonhosted.org/packages/15/0e/8a800e093b7f7430dbaefa80075aee9158ec22e4c4fc3c1a66e4fb96cb4f/torch-2.8.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:83c13411a26fac3d101fe8035a6b0476ae606deb8688e904e796a3534c197def", size = 102020139, upload_time = "2025-08-06T14:54:39.047Z" },
1858
+ { url = "https://files.pythonhosted.org/packages/4a/15/5e488ca0bc6162c86a33b58642bc577c84ded17c7b72d97e49b5833e2d73/torch-2.8.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:8f0a9d617a66509ded240add3754e462430a6c1fc5589f86c17b433dd808f97a", size = 887990692, upload_time = "2025-08-06T14:56:18.286Z" },
1859
+ { url = "https://files.pythonhosted.org/packages/b4/a8/6a04e4b54472fc5dba7ca2341ab219e529f3c07b6941059fbf18dccac31f/torch-2.8.0-cp313-cp313t-win_amd64.whl", hash = "sha256:a7242b86f42be98ac674b88a4988643b9bc6145437ec8f048fea23f72feb5eca", size = 241603453, upload_time = "2025-08-06T14:55:22.945Z" },
1860
+ { url = "https://files.pythonhosted.org/packages/04/6e/650bb7f28f771af0cb791b02348db8b7f5f64f40f6829ee82aa6ce99aabe/torch-2.8.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:7b677e17f5a3e69fdef7eb3b9da72622f8d322692930297e4ccb52fefc6c8211", size = 73632395, upload_time = "2025-08-06T14:55:28.645Z" },
1861
+ { url = "https://files.pythonhosted.org/packages/5b/b0/a321f27270049baa12f5c3fb0d6ceea005634787e3af9a8d75dce8306b0a/torch-2.8.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:da6afa31c13b669d4ba49d8a2169f0db2c3ec6bec4af898aa714f401d4c38904", size = 102059214, upload_time = "2025-08-06T14:55:33.433Z" },
1862
+ { url = "https://files.pythonhosted.org/packages/fd/dd/1630cb51b10d3d2e97db95e5a84c32def81fc26b005bce6fc880b0e6db81/torch-2.8.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:06fcee8000e5c62a9f3e52a688b9c5abb7c6228d0e56e3452983416025c41381", size = 888024302, upload_time = "2025-08-06T14:57:28.23Z" },
1863
+ { url = "https://files.pythonhosted.org/packages/b9/dc/1f1f621afe15e3c496e1e8f94f8903f75f87e7d642d5a985e92210cc208d/torch-2.8.0-cp39-cp39-win_amd64.whl", hash = "sha256:5128fe752a355d9308e56af1ad28b15266fe2da5948660fad44de9e3a9e36e8c", size = 241249338, upload_time = "2025-08-06T14:57:05.669Z" },
1864
+ { url = "https://files.pythonhosted.org/packages/ae/95/ae26263aceb3d57b821179f827d0e321373ed49423e603dd5906ab14a730/torch-2.8.0-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:e9f071f5b52a9f6970dc8a919694b27a91ae9dc08898b2b988abbef5eddfd1ae", size = 73610795, upload_time = "2025-08-06T14:57:11.513Z" },
1865
+ ]
1866
+
1867
  [[package]]
1868
  name = "tornado"
1869
  version = "6.5.2"
 
1895
  { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540, upload_time = "2024-11-24T20:12:19.698Z" },
1896
  ]
1897
 
1898
+ [[package]]
1899
+ name = "triton"
1900
+ version = "3.4.0"
1901
+ source = { registry = "https://pypi.org/simple" }
1902
+ dependencies = [
1903
+ { name = "importlib-metadata", marker = "python_full_version < '3.10'" },
1904
+ { name = "setuptools" },
1905
+ ]
1906
+ wheels = [
1907
+ { url = "https://files.pythonhosted.org/packages/62/ee/0ee5f64a87eeda19bbad9bc54ae5ca5b98186ed00055281fd40fb4beb10e/triton-3.4.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7ff2785de9bc02f500e085420273bb5cc9c9bb767584a4aa28d6e360cec70128", size = 155430069, upload_time = "2025-07-30T19:58:21.715Z" },
1908
+ { url = "https://files.pythonhosted.org/packages/7d/39/43325b3b651d50187e591eefa22e236b2981afcebaefd4f2fc0ea99df191/triton-3.4.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7b70f5e6a41e52e48cfc087436c8a28c17ff98db369447bcaff3b887a3ab4467", size = 155531138, upload_time = "2025-07-30T19:58:29.908Z" },
1909
+ { url = "https://files.pythonhosted.org/packages/d0/66/b1eb52839f563623d185f0927eb3530ee4d5ffe9d377cdaf5346b306689e/triton-3.4.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:31c1d84a5c0ec2c0f8e8a072d7fd150cab84a9c239eaddc6706c081bfae4eb04", size = 155560068, upload_time = "2025-07-30T19:58:37.081Z" },
1910
+ { url = "https://files.pythonhosted.org/packages/30/7b/0a685684ed5322d2af0bddefed7906674f67974aa88b0fae6e82e3b766f6/triton-3.4.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00be2964616f4c619193cb0d1b29a99bd4b001d7dc333816073f92cf2a8ccdeb", size = 155569223, upload_time = "2025-07-30T19:58:44.017Z" },
1911
+ { url = "https://files.pythonhosted.org/packages/20/63/8cb444ad5cdb25d999b7d647abac25af0ee37d292afc009940c05b82dda0/triton-3.4.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7936b18a3499ed62059414d7df563e6c163c5e16c3773678a3ee3d417865035d", size = 155659780, upload_time = "2025-07-30T19:58:51.171Z" },
1912
+ { url = "https://files.pythonhosted.org/packages/12/34/1251beb5a3cb93f3950ebe68732752014646003ef6eb11eb5f1a37ca78cd/triton-3.4.0-cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:98e5c1442eaeabae2e2452ae765801bd53cd4ce873cab0d1bdd59a32ab2d9397", size = 155430799, upload_time = "2025-07-30T19:58:57.664Z" },
1913
+ ]
1914
+
1915
  [[package]]
1916
  name = "typing-extensions"
1917
  version = "4.14.1"
 
1956
  { url = "https://files.pythonhosted.org/packages/db/d9/c495884c6e548fce18a8f40568ff120bc3a4b7b99813081c8ac0c936fa64/watchdog-6.0.0-py3-none-win_amd64.whl", hash = "sha256:cbafb470cf848d93b5d013e2ecb245d4aa1c8fd0504e863ccefa32445359d680", size = 79070, upload_time = "2024-11-01T14:07:10.686Z" },
1957
  { url = "https://files.pythonhosted.org/packages/33/e8/e40370e6d74ddba47f002a32919d91310d6074130fe4e17dabcafc15cbf1/watchdog-6.0.0-py3-none-win_ia64.whl", hash = "sha256:a1914259fa9e1454315171103c6a30961236f508b9b623eae470268bbcc6a22f", size = 79067, upload_time = "2024-11-01T14:07:11.845Z" },
1958
  ]
1959
+
1960
+ [[package]]
1961
+ name = "zipp"
1962
+ version = "3.23.0"
1963
+ source = { registry = "https://pypi.org/simple" }
1964
+ sdist = { url = "https://files.pythonhosted.org/packages/e3/02/0f2892c661036d50ede074e376733dca2ae7c6eb617489437771209d4180/zipp-3.23.0.tar.gz", hash = "sha256:a07157588a12518c9d4034df3fbbee09c814741a33ff63c05fa29d26a2404166", size = 25547, upload_time = "2025-06-08T17:06:39.4Z" }
1965
+ wheels = [
1966
+ { url = "https://files.pythonhosted.org/packages/2e/54/647ade08bf0db230bfea292f893923872fd20be6ac6f53b2b936ba839d75/zipp-3.23.0-py3-none-any.whl", hash = "sha256:071652d6115ed432f5ce1d34c336c0adfd6a884660d1e9712a256d3d3bd4b14e", size = 10276, upload_time = "2025-06-08T17:06:38.034Z" },
1967
+ ]