|
|
import streamlit as st |
|
|
import pandas as pd |
|
|
from PIL import Image |
|
|
import io |
|
|
import numpy as np |
|
|
import tempfile |
|
|
import os |
|
|
import torch |
|
|
import torch.nn as nn |
|
|
from torchvision import transforms, models |
|
|
import torch.nn.functional as F |
|
|
|
|
|
|
|
|
st.set_page_config( |
|
|
page_title="Sentiment Analysis Testing Ground", |
|
|
page_icon="π§ ", |
|
|
layout="wide", |
|
|
initial_sidebar_state="expanded", |
|
|
) |
|
|
|
|
|
|
|
|
st.markdown( |
|
|
""" |
|
|
<style> |
|
|
.main-header { |
|
|
font-size: 2.5rem; |
|
|
font-weight: bold; |
|
|
color: #1f77b4; |
|
|
text-align: center; |
|
|
margin-bottom: 2rem; |
|
|
} |
|
|
.model-card { |
|
|
background-color: #f0f2f6; |
|
|
padding: 1.5rem; |
|
|
border-radius: 10px; |
|
|
margin: 1rem 0; |
|
|
border-left: 4px solid #1f77b4; |
|
|
} |
|
|
.result-box { |
|
|
background-color: #e8f4fd; |
|
|
padding: 1rem; |
|
|
border-radius: 8px; |
|
|
border: 1px solid #1f77b4; |
|
|
margin: 1rem 0; |
|
|
} |
|
|
.upload-section { |
|
|
background-color: #f8f9fa; |
|
|
padding: 1.5rem; |
|
|
border-radius: 10px; |
|
|
border: 2px dashed #dee2e6; |
|
|
text-align: center; |
|
|
margin: 1rem 0; |
|
|
} |
|
|
</style> |
|
|
""", |
|
|
unsafe_allow_html=True, |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
@st.cache_resource |
|
|
def load_vision_model(): |
|
|
"""Load the pre-trained ResNet-50 vision sentiment model""" |
|
|
try: |
|
|
|
|
|
model_path = "models/resnet50_model.pth" |
|
|
if not os.path.exists(model_path): |
|
|
st.error(f"β Vision model file not found at: {model_path}") |
|
|
return None |
|
|
|
|
|
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
checkpoint = torch.load(model_path, map_location=device) |
|
|
|
|
|
|
|
|
if "fc.weight" in checkpoint: |
|
|
num_classes = checkpoint["fc.weight"].shape[0] |
|
|
st.info(f"π Model checkpoint has {num_classes} output classes") |
|
|
else: |
|
|
|
|
|
num_classes = 3 |
|
|
st.warning( |
|
|
"β οΈ Could not determine number of classes from checkpoint, assuming 3" |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
model = models.resnet50(weights=None) |
|
|
|
|
|
num_ftrs = model.fc.in_features |
|
|
model.fc = nn.Linear(num_ftrs, num_classes) |
|
|
|
|
|
|
|
|
model.load_state_dict(checkpoint) |
|
|
model.to(device) |
|
|
model.eval() |
|
|
|
|
|
st.success(f"β
Vision model loaded successfully with {num_classes} classes!") |
|
|
return model, device, num_classes |
|
|
except Exception as e: |
|
|
st.error(f"β Error loading vision model: {str(e)}") |
|
|
return None, None, None |
|
|
|
|
|
|
|
|
@st.cache_data |
|
|
def get_vision_transforms(): |
|
|
"""Get the image transforms used during FER2013 training""" |
|
|
return transforms.Compose( |
|
|
[ |
|
|
transforms.Resize(224), |
|
|
transforms.CenterCrop(224), |
|
|
transforms.ToTensor(), |
|
|
transforms.Normalize( |
|
|
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] |
|
|
), |
|
|
] |
|
|
) |
|
|
|
|
|
|
|
|
def detect_and_preprocess_face(image, crop_tightness=0.05): |
|
|
""" |
|
|
Detect face in image, crop to face region, convert to grayscale, and resize to 224x224 |
|
|
to match FER2013 dataset format (grayscale converted to 3-channel RGB) |
|
|
|
|
|
Args: |
|
|
image: Input image (PIL Image or numpy array) |
|
|
crop_tightness: Padding around face (0.0 = no padding, 0.3 = 30% padding) |
|
|
""" |
|
|
try: |
|
|
import cv2 |
|
|
import numpy as np |
|
|
|
|
|
|
|
|
if isinstance(image, Image.Image): |
|
|
|
|
|
img_array = np.array(image) |
|
|
|
|
|
if len(img_array.shape) == 3: |
|
|
img_array = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR) |
|
|
else: |
|
|
img_array = image |
|
|
|
|
|
|
|
|
face_cascade = cv2.CascadeClassifier( |
|
|
cv2.data.haarcascades + "haarcascade_frontalface_default.xml" |
|
|
) |
|
|
|
|
|
|
|
|
gray = cv2.cvtColor(img_array, cv2.COLOR_BGR2GRAY) |
|
|
|
|
|
|
|
|
faces = face_cascade.detectMultiScale( |
|
|
gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30) |
|
|
) |
|
|
|
|
|
if len(faces) == 0: |
|
|
st.warning("β οΈ No face detected in the image. Using center crop instead.") |
|
|
|
|
|
if isinstance(image, Image.Image): |
|
|
|
|
|
rgb_pil = image.convert("RGB") |
|
|
|
|
|
width, height = rgb_pil.size |
|
|
size = min(width, height) |
|
|
left = (width - size) // 2 |
|
|
top = (height - size) // 2 |
|
|
right = left + size |
|
|
bottom = top + size |
|
|
cropped = rgb_pil.crop((left, top, right, bottom)) |
|
|
|
|
|
resized = cropped.resize((224, 224), Image.Resampling.LANCZOS) |
|
|
|
|
|
|
|
|
gray_pil = resized.convert("L") |
|
|
|
|
|
gray_rgb_pil = gray_pil.convert("RGB") |
|
|
return gray_rgb_pil |
|
|
else: |
|
|
return None |
|
|
|
|
|
|
|
|
x, y, w, h = max(faces, key=lambda rect: rect[2] * rect[3]) |
|
|
|
|
|
|
|
|
padding_x = int(w * crop_tightness) |
|
|
padding_y = int(h * crop_tightness) |
|
|
|
|
|
|
|
|
x1 = max(0, x - padding_x) |
|
|
y1 = max(0, y - padding_y) |
|
|
x2 = min(img_array.shape[1], x + w + padding_x) |
|
|
y2 = min(img_array.shape[0], y + h + padding_y) |
|
|
|
|
|
|
|
|
face_crop = img_array[y1:y2, x1:x2] |
|
|
|
|
|
|
|
|
face_crop_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB) |
|
|
|
|
|
|
|
|
face_gray = cv2.cvtColor(face_crop_rgb, cv2.COLOR_RGB2GRAY) |
|
|
|
|
|
|
|
|
face_resized = cv2.resize(face_gray, (224, 224), interpolation=cv2.INTER_AREA) |
|
|
|
|
|
|
|
|
face_rgb_3channel = cv2.cvtColor(face_resized, cv2.COLOR_GRAY2RGB) |
|
|
|
|
|
|
|
|
face_pil = Image.fromarray(face_rgb_3channel) |
|
|
|
|
|
return face_pil |
|
|
|
|
|
except ImportError: |
|
|
st.error( |
|
|
"β OpenCV not installed. Please install it with: pip install opencv-python" |
|
|
) |
|
|
st.info("Falling back to basic preprocessing...") |
|
|
|
|
|
if isinstance(image, Image.Image): |
|
|
rgb_pil = image.convert("RGB") |
|
|
resized = rgb_pil.resize((48, 48), Image.Resampling.LANCZOS) |
|
|
|
|
|
gray_pil = resized.convert("L") |
|
|
gray_rgb_pil = gray_pil.convert("RGB") |
|
|
return gray_rgb_pil |
|
|
return None |
|
|
except Exception as e: |
|
|
st.error(f"β Error in face detection: {str(e)}") |
|
|
st.info("Falling back to basic preprocessing...") |
|
|
|
|
|
if isinstance(image, Image.Image): |
|
|
rgb_pil = image.convert("RGB") |
|
|
resized = rgb_pil.resize((48, 48), Image.Resampling.LANCZOS) |
|
|
|
|
|
gray_pil = resized.convert("L") |
|
|
gray_rgb_pil = gray_pil.convert("RGB") |
|
|
return gray_rgb_pil |
|
|
return None |
|
|
|
|
|
|
|
|
def get_sentiment_mapping(num_classes): |
|
|
"""Get the sentiment mapping based on number of classes""" |
|
|
if num_classes == 3: |
|
|
return {0: "Negative", 1: "Neutral", 2: "Positive"} |
|
|
elif num_classes == 4: |
|
|
|
|
|
return {0: "Angry", 1: "Sad", 2: "Happy", 3: "Neutral"} |
|
|
elif num_classes == 7: |
|
|
|
|
|
return { |
|
|
0: "Angry", |
|
|
1: "Disgust", |
|
|
2: "Fear", |
|
|
3: "Happy", |
|
|
4: "Sad", |
|
|
5: "Surprise", |
|
|
6: "Neutral", |
|
|
} |
|
|
else: |
|
|
|
|
|
return {i: f"Class_{i}" for i in range(num_classes)} |
|
|
|
|
|
|
|
|
|
|
|
def predict_text_sentiment(text): |
|
|
""" |
|
|
Analyze text sentiment using TextBlob |
|
|
""" |
|
|
if not text or text.strip() == "": |
|
|
return "No text provided", 0.0 |
|
|
|
|
|
try: |
|
|
from textblob import TextBlob |
|
|
|
|
|
|
|
|
blob = TextBlob(text) |
|
|
|
|
|
|
|
|
polarity = blob.sentiment.polarity |
|
|
|
|
|
|
|
|
subjectivity = blob.sentiment.subjectivity |
|
|
|
|
|
|
|
|
if polarity > 0.1: |
|
|
sentiment = "Positive" |
|
|
confidence = min(0.95, 0.6 + abs(polarity) * 0.3) |
|
|
elif polarity < -0.1: |
|
|
sentiment = "Negative" |
|
|
confidence = min(0.95, 0.6 + abs(polarity) * 0.3) |
|
|
else: |
|
|
sentiment = "Neutral" |
|
|
confidence = 0.7 - abs(polarity) * 0.2 |
|
|
|
|
|
|
|
|
confidence = round(confidence, 2) |
|
|
|
|
|
return sentiment, confidence |
|
|
|
|
|
except ImportError: |
|
|
st.error( |
|
|
"β TextBlob not installed. Please install it with: pip install textblob" |
|
|
) |
|
|
return "TextBlob not available", 0.0 |
|
|
except Exception as e: |
|
|
st.error(f"β Error in text sentiment analysis: {str(e)}") |
|
|
return "Error occurred", 0.0 |
|
|
|
|
|
|
|
|
@st.cache_resource |
|
|
def load_audio_model(): |
|
|
"""Load the pre-trained Wav2Vec2 audio sentiment model""" |
|
|
try: |
|
|
|
|
|
model_path = "models/wav2vec2_model.pth" |
|
|
if not os.path.exists(model_path): |
|
|
st.error(f"β Audio model file not found at: {model_path}") |
|
|
return None, None, None, None |
|
|
|
|
|
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
checkpoint = torch.load(model_path, map_location=device) |
|
|
|
|
|
|
|
|
if "classifier.weight" in checkpoint: |
|
|
num_classes = checkpoint["classifier.weight"].shape[0] |
|
|
st.info(f"π Audio model checkpoint has {num_classes} output classes") |
|
|
else: |
|
|
num_classes = 3 |
|
|
st.warning( |
|
|
"β οΈ Could not determine number of classes from checkpoint, assuming 3" |
|
|
) |
|
|
|
|
|
|
|
|
from transformers import AutoModelForAudioClassification |
|
|
|
|
|
model = AutoModelForAudioClassification.from_pretrained( |
|
|
"facebook/wav2vec2-base", num_labels=num_classes |
|
|
) |
|
|
|
|
|
|
|
|
model.load_state_dict(checkpoint) |
|
|
model.to(device) |
|
|
model.eval() |
|
|
|
|
|
|
|
|
from transformers import AutoFeatureExtractor |
|
|
|
|
|
feature_extractor = AutoFeatureExtractor.from_pretrained( |
|
|
"facebook/wav2vec2-base" |
|
|
) |
|
|
|
|
|
st.success(f"β
Audio model loaded successfully with {num_classes} classes!") |
|
|
return model, device, num_classes, feature_extractor |
|
|
except Exception as e: |
|
|
st.error(f"β Error loading audio model: {str(e)}") |
|
|
return None, None, None, None |
|
|
|
|
|
|
|
|
def predict_audio_sentiment(audio_bytes): |
|
|
""" |
|
|
Analyze audio sentiment using fine-tuned Wav2Vec2 model |
|
|
Preprocessing matches CREMA-D + RAVDESS training specifications: |
|
|
- Target sampling rate: 16kHz |
|
|
- Max duration: 5.0 seconds |
|
|
- Feature extraction: AutoFeatureExtractor with max_length, truncation, padding |
|
|
""" |
|
|
if audio_bytes is None: |
|
|
return "No audio provided", 0.0 |
|
|
|
|
|
try: |
|
|
|
|
|
model, device, num_classes, feature_extractor = load_audio_model() |
|
|
if model is None: |
|
|
return "Model not loaded", 0.0 |
|
|
|
|
|
|
|
|
import librosa |
|
|
import io |
|
|
import tempfile |
|
|
|
|
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file: |
|
|
tmp_file.write(audio_bytes) |
|
|
tmp_file_path = tmp_file.name |
|
|
|
|
|
try: |
|
|
|
|
|
audio, sr = librosa.load(tmp_file_path, sr=None) |
|
|
|
|
|
|
|
|
if sr != 16000: |
|
|
audio = librosa.resample(y=audio, orig_sr=sr, target_sr=16000) |
|
|
|
|
|
|
|
|
|
|
|
inputs = feature_extractor( |
|
|
audio, |
|
|
sampling_rate=16000, |
|
|
max_length=int(5.0 * 16000), |
|
|
truncation=True, |
|
|
padding="max_length", |
|
|
return_tensors="pt", |
|
|
) |
|
|
|
|
|
|
|
|
input_values = inputs.input_values.to(device) |
|
|
|
|
|
|
|
|
with torch.no_grad(): |
|
|
outputs = model(input_values) |
|
|
probabilities = torch.softmax(outputs.logits, dim=1) |
|
|
confidence, predicted = torch.max(probabilities, 1) |
|
|
|
|
|
|
|
|
if num_classes == 3: |
|
|
sentiment_map = {0: "Negative", 1: "Neutral", 2: "Positive"} |
|
|
else: |
|
|
|
|
|
sentiment_map = {i: f"Class_{i}" for i in range(num_classes)} |
|
|
|
|
|
sentiment = sentiment_map[predicted.item()] |
|
|
confidence_score = confidence.item() |
|
|
|
|
|
return sentiment, confidence_score |
|
|
|
|
|
finally: |
|
|
|
|
|
os.unlink(tmp_file_path) |
|
|
|
|
|
except ImportError as e: |
|
|
st.error(f"β Required library not installed: {str(e)}") |
|
|
st.info("Please install: pip install librosa transformers") |
|
|
return "Library not available", 0.0 |
|
|
except Exception as e: |
|
|
st.error(f"β Error in audio sentiment prediction: {str(e)}") |
|
|
return "Error occurred", 0.0 |
|
|
|
|
|
|
|
|
def predict_vision_sentiment(image, crop_tightness=0.05): |
|
|
""" |
|
|
Load ResNet-50 and run inference for vision sentiment analysis |
|
|
|
|
|
Args: |
|
|
image: Input image (PIL Image or numpy array) |
|
|
crop_tightness: Padding around face (0.0 = no padding, 0.3 = 30% padding) |
|
|
""" |
|
|
if image is None: |
|
|
return "No image provided", 0.0 |
|
|
|
|
|
try: |
|
|
|
|
|
model, device, num_classes = load_vision_model() |
|
|
if model is None: |
|
|
return "Model not loaded", 0.0 |
|
|
|
|
|
|
|
|
st.info( |
|
|
"π Detecting face and preprocessing image to match training data format..." |
|
|
) |
|
|
preprocessed_image = detect_and_preprocess_face(image, crop_tightness=0.0) |
|
|
|
|
|
if preprocessed_image is None: |
|
|
return "Image preprocessing failed", 0.0 |
|
|
|
|
|
|
|
|
st.image( |
|
|
preprocessed_image, |
|
|
caption="Preprocessed Image (48x48 Grayscale β 3-channel RGB)", |
|
|
width=200, |
|
|
) |
|
|
|
|
|
|
|
|
transform = get_vision_transforms() |
|
|
|
|
|
|
|
|
image_tensor = transform(preprocessed_image).unsqueeze(0).to(device) |
|
|
|
|
|
|
|
|
with torch.no_grad(): |
|
|
outputs = model(image_tensor) |
|
|
|
|
|
|
|
|
st.info(f"π Model output shape: {outputs.shape}") |
|
|
|
|
|
probabilities = F.softmax(outputs, dim=1) |
|
|
confidence, predicted = torch.max(probabilities, 1) |
|
|
|
|
|
|
|
|
sentiment_map = get_sentiment_mapping(num_classes) |
|
|
sentiment = sentiment_map[predicted.item()] |
|
|
confidence_score = confidence.item() |
|
|
|
|
|
return sentiment, confidence_score |
|
|
|
|
|
except Exception as e: |
|
|
st.error(f"Error in vision sentiment prediction: {str(e)}") |
|
|
st.error( |
|
|
f"Model output shape mismatch. Expected {num_classes} classes but got different." |
|
|
) |
|
|
return "Error occurred", 0.0 |
|
|
|
|
|
|
|
|
def predict_fused_sentiment(text=None, audio_bytes=None, image=None): |
|
|
""" |
|
|
TODO: Implement ensemble/fusion logic combining all three models |
|
|
This is a placeholder function for fused sentiment analysis |
|
|
""" |
|
|
|
|
|
results = [] |
|
|
|
|
|
if text: |
|
|
text_sentiment, text_conf = predict_text_sentiment(text) |
|
|
results.append((text_sentiment, text_conf)) |
|
|
|
|
|
if audio_bytes: |
|
|
audio_sentiment, audio_conf = predict_audio_sentiment(audio_bytes) |
|
|
results.append((audio_sentiment, audio_conf)) |
|
|
|
|
|
if image: |
|
|
vision_sentiment, vision_conf = predict_vision_sentiment(image) |
|
|
results.append((vision_sentiment, vision_conf)) |
|
|
|
|
|
if not results: |
|
|
return "No inputs provided", 0.0 |
|
|
|
|
|
|
|
|
sentiment_counts = {} |
|
|
total_confidence = 0 |
|
|
|
|
|
for sentiment, confidence in results: |
|
|
sentiment_counts[sentiment] = sentiment_counts.get(sentiment, 0) + 1 |
|
|
total_confidence += confidence |
|
|
|
|
|
|
|
|
final_sentiment = max(sentiment_counts, key=sentiment_counts.get) |
|
|
avg_confidence = total_confidence / len(results) |
|
|
|
|
|
return final_sentiment, avg_confidence |
|
|
|
|
|
|
|
|
|
|
|
st.sidebar.title("π§ Sentiment Analysis") |
|
|
st.sidebar.markdown("---") |
|
|
|
|
|
|
|
|
page = st.sidebar.selectbox( |
|
|
"Choose a page:", |
|
|
[ |
|
|
"π Home", |
|
|
"π Text Sentiment", |
|
|
"π΅ Audio Sentiment", |
|
|
"πΌοΈ Vision Sentiment", |
|
|
"π Fused Model", |
|
|
], |
|
|
) |
|
|
|
|
|
|
|
|
if page == "π Home": |
|
|
st.markdown( |
|
|
'<h1 class="main-header">Sentiment Analysis Testing Ground</h1>', |
|
|
unsafe_allow_html=True, |
|
|
) |
|
|
|
|
|
st.markdown( |
|
|
""" |
|
|
<div class="model-card"> |
|
|
<h2>Welcome to your Multi-Modal Sentiment Analysis Testing Platform!</h2> |
|
|
<p>This application provides a comprehensive testing environment for your three independent sentiment analysis models:</p> |
|
|
</div> |
|
|
""", |
|
|
unsafe_allow_html=True, |
|
|
) |
|
|
|
|
|
col1, col2, col3 = st.columns(3) |
|
|
|
|
|
with col1: |
|
|
st.markdown( |
|
|
""" |
|
|
<div class="model-card"> |
|
|
<h3>π Text Sentiment Model</h3> |
|
|
<p>β
<strong>READY TO USE</strong> - Analyze sentiment from text input using TextBlob</p> |
|
|
<ul> |
|
|
<li>Process any text input</li> |
|
|
<li>Get sentiment classification (Positive/Negative/Neutral)</li> |
|
|
<li>View confidence scores</li> |
|
|
<li>Real-time NLP analysis</li> |
|
|
</ul> |
|
|
</div> |
|
|
""", |
|
|
unsafe_allow_html=True, |
|
|
) |
|
|
|
|
|
with col2: |
|
|
st.markdown( |
|
|
""" |
|
|
<div class="model-card"> |
|
|
<h3>π΅ Audio Sentiment Model</h3> |
|
|
<p>β
<strong>READY TO USE</strong> - Analyze sentiment from audio files using fine-tuned Wav2Vec2</p> |
|
|
<ul> |
|
|
<li>Upload audio files (.wav, .mp3, .m4a, .flac)</li> |
|
|
<li>ποΈ Record audio directly with microphone (max 5s)</li> |
|
|
<li>π Automatic preprocessing: 16kHz sampling, 5s max duration (CREMA-D + RAVDESS format)</li> |
|
|
<li>Listen to uploaded/recorded audio</li> |
|
|
<li>Get sentiment predictions</li> |
|
|
<li>Real-time audio analysis</li> |
|
|
</ul> |
|
|
</div> |
|
|
""", |
|
|
unsafe_allow_html=True, |
|
|
) |
|
|
|
|
|
with col3: |
|
|
st.markdown( |
|
|
""" |
|
|
<div class="model-card"> |
|
|
<h3>πΌοΈ Vision Sentiment Model</h3> |
|
|
<p>Analyze sentiment from images using fine-tuned ResNet-50</p> |
|
|
<ul> |
|
|
<li>Upload image files (.png, .jpg, .jpeg, .bmp, .tiff)</li> |
|
|
<li>π Automatic face detection & preprocessing</li> |
|
|
<li>π― Fixed 0% padding for tightest face crop</li> |
|
|
<li>π Convert to 224x224 grayscale β 3-channel RGB (FER2013 format)</li> |
|
|
<li>π― Transforms: Resize(224) β CenterCrop(224) β ImageNet Normalization</li> |
|
|
<li>Preview original & preprocessed images</li> |
|
|
<li>Get sentiment predictions</li> |
|
|
</ul> |
|
|
</div> |
|
|
""", |
|
|
unsafe_allow_html=True, |
|
|
) |
|
|
|
|
|
st.markdown( |
|
|
""" |
|
|
<div class="model-card"> |
|
|
<h3>π Fused Model</h3> |
|
|
<p>Combine predictions from all three models for enhanced accuracy</p> |
|
|
<ul> |
|
|
<li>Multi-modal input processing</li> |
|
|
<li>Ensemble prediction strategies</li> |
|
|
<li>Comprehensive sentiment analysis</li> |
|
|
</ul> |
|
|
</div> |
|
|
""", |
|
|
unsafe_allow_html=True, |
|
|
) |
|
|
|
|
|
st.markdown("---") |
|
|
st.markdown( |
|
|
""" |
|
|
<div style="text-align: center; color: #666;"> |
|
|
<p><strong>Note:</strong> This application now has <strong>ALL THREE MODELS</strong> fully integrated and ready to use! π</p> |
|
|
<p><strong>TextBlob</strong> (Text) + <strong>Wav2Vec2</strong> (Audio) + <strong>ResNet-50</strong> (Vision)</p> |
|
|
</div> |
|
|
""", |
|
|
unsafe_allow_html=True, |
|
|
) |
|
|
|
|
|
|
|
|
elif page == "π Text Sentiment": |
|
|
st.title("π Text Sentiment Analysis") |
|
|
st.markdown("Analyze the sentiment of your text using our TextBlob-based model.") |
|
|
|
|
|
|
|
|
text_input = st.text_area( |
|
|
"Enter your text here:", |
|
|
height=150, |
|
|
placeholder="Type or paste your text here to analyze its sentiment...", |
|
|
) |
|
|
|
|
|
|
|
|
if st.button("π Analyze Sentiment", type="primary", use_container_width=True): |
|
|
if text_input and text_input.strip(): |
|
|
with st.spinner("Analyzing text sentiment..."): |
|
|
sentiment, confidence = predict_text_sentiment(text_input) |
|
|
|
|
|
|
|
|
st.markdown("### Results") |
|
|
|
|
|
|
|
|
col1, col2 = st.columns(2) |
|
|
with col1: |
|
|
st.metric("Sentiment", sentiment) |
|
|
with col2: |
|
|
st.metric("Confidence", f"{confidence:.2f}") |
|
|
|
|
|
|
|
|
sentiment_colors = { |
|
|
"Positive": "π’", |
|
|
"Negative": "π΄", |
|
|
"Neutral": "π‘", |
|
|
} |
|
|
|
|
|
st.markdown( |
|
|
f""" |
|
|
<div class="result-box"> |
|
|
<h4>{sentiment_colors.get(sentiment, "β")} Sentiment: {sentiment}</h4> |
|
|
<p><strong>Confidence:</strong> {confidence:.2f}</p> |
|
|
<p><strong>Input Text:</strong> "{text_input[:100]}{'...' if len(text_input) > 100 else ''}"</p> |
|
|
<p><strong>Model:</strong> TextBlob (Natural Language Processing)</p> |
|
|
</div> |
|
|
""", |
|
|
unsafe_allow_html=True, |
|
|
) |
|
|
else: |
|
|
st.error("Please enter some text to analyze.") |
|
|
|
|
|
|
|
|
elif page == "π΅ Audio Sentiment": |
|
|
st.title("π΅ Audio Sentiment Analysis") |
|
|
st.markdown( |
|
|
"Analyze the sentiment of your audio files using our fine-tuned Wav2Vec2 model." |
|
|
) |
|
|
|
|
|
|
|
|
st.info( |
|
|
"βΉοΈ **Audio Preprocessing**: Audio will be automatically processed to match CREMA-D + RAVDESS training format: " |
|
|
"16kHz sampling rate, max 5 seconds, with automatic resampling and feature extraction." |
|
|
) |
|
|
|
|
|
|
|
|
model, device, num_classes, feature_extractor = load_audio_model() |
|
|
if model is None: |
|
|
st.error("β Audio model could not be loaded. Please check the model file.") |
|
|
st.info("Expected model file: `models/wav2vec2_model.pth`") |
|
|
else: |
|
|
st.success( |
|
|
f"β
Audio model loaded successfully on {device} with {num_classes} classes!" |
|
|
) |
|
|
|
|
|
|
|
|
st.subheader("π€ Choose Input Method") |
|
|
input_method = st.radio( |
|
|
"Select how you want to provide audio:", |
|
|
["π Upload Audio File", "ποΈ Record Audio"], |
|
|
horizontal=True, |
|
|
) |
|
|
|
|
|
if input_method == "π Upload Audio File": |
|
|
|
|
|
uploaded_audio = st.file_uploader( |
|
|
"Choose an audio file", |
|
|
type=["wav", "mp3", "m4a", "flac"], |
|
|
help="Supported formats: WAV, MP3, M4A, FLAC", |
|
|
) |
|
|
|
|
|
audio_source = "uploaded_file" |
|
|
audio_name = uploaded_audio.name if uploaded_audio else None |
|
|
|
|
|
else: |
|
|
st.markdown( |
|
|
""" |
|
|
<div class="model-card"> |
|
|
<h3>ποΈ Audio Recording</h3> |
|
|
<p>Record audio directly with your microphone (max 5 seconds).</p> |
|
|
<p><strong>Note:</strong> Make sure your microphone is accessible and you have permission to use it.</p> |
|
|
</div> |
|
|
""", |
|
|
unsafe_allow_html=True, |
|
|
) |
|
|
|
|
|
|
|
|
recorded_audio = st.audio_input( |
|
|
label="Click to start recording", |
|
|
help="Click the microphone button to start/stop recording. Maximum recording time is 5 seconds.", |
|
|
) |
|
|
|
|
|
if recorded_audio is not None: |
|
|
|
|
|
st.audio(recorded_audio, format="audio/wav") |
|
|
st.success("β
Audio recorded successfully!") |
|
|
|
|
|
|
|
|
uploaded_audio = recorded_audio |
|
|
audio_source = "recorded" |
|
|
audio_name = "Recorded Audio" |
|
|
else: |
|
|
uploaded_audio = None |
|
|
audio_source = None |
|
|
audio_name = None |
|
|
|
|
|
if uploaded_audio is not None: |
|
|
|
|
|
if audio_source == "recorded": |
|
|
st.audio(uploaded_audio, format="audio/wav") |
|
|
st.info(f"ποΈ {audio_name} | Source: Microphone Recording") |
|
|
else: |
|
|
st.audio( |
|
|
uploaded_audio, format=f'audio/{uploaded_audio.name.split(".")[-1]}' |
|
|
) |
|
|
|
|
|
file_size = len(uploaded_audio.getvalue()) / 1024 |
|
|
st.info(f"π File: {uploaded_audio.name} | Size: {file_size:.1f} KB") |
|
|
|
|
|
|
|
|
if st.button( |
|
|
"π Analyze Audio Sentiment", type="primary", use_container_width=True |
|
|
): |
|
|
if model is None: |
|
|
st.error("β Model not loaded. Cannot analyze audio.") |
|
|
else: |
|
|
with st.spinner("Analyzing audio sentiment..."): |
|
|
audio_bytes = uploaded_audio.getvalue() |
|
|
sentiment, confidence = predict_audio_sentiment(audio_bytes) |
|
|
|
|
|
|
|
|
st.markdown("### Results") |
|
|
|
|
|
col1, col2 = st.columns(2) |
|
|
with col1: |
|
|
st.metric("Sentiment", sentiment) |
|
|
with col2: |
|
|
st.metric("Confidence", f"{confidence:.2f}") |
|
|
|
|
|
|
|
|
sentiment_colors = {"Positive": "π’", "Negative": "π΄", "Neutral": "π‘"} |
|
|
|
|
|
st.markdown( |
|
|
f""" |
|
|
<div class="result-box"> |
|
|
<h4>{sentiment_colors.get(sentiment, "β")} Sentiment: {sentiment}</h4> |
|
|
<p><strong>Confidence:</strong> {confidence:.2f}</p> |
|
|
<p><strong>Audio Source:</strong> {audio_name}</p> |
|
|
<p><strong>Model:</strong> Wav2Vec2 (Fine-tuned on RAVDESS + CREMA-D)</p> |
|
|
</div> |
|
|
""", |
|
|
unsafe_allow_html=True, |
|
|
) |
|
|
else: |
|
|
if input_method == "π Upload Audio File": |
|
|
st.info("π Please upload an audio file to begin analysis.") |
|
|
else: |
|
|
st.info("ποΈ Click the microphone button above to record audio for analysis.") |
|
|
|
|
|
|
|
|
elif page == "πΌοΈ Vision Sentiment": |
|
|
st.title("πΌοΈ Vision Sentiment Analysis") |
|
|
st.markdown( |
|
|
"Analyze the sentiment of your images using our fine-tuned ResNet-50 model." |
|
|
) |
|
|
|
|
|
st.info( |
|
|
"βΉοΈ **Note**: Images will be automatically preprocessed to match FER2013 format: face detection, grayscale conversion, and 224x224 resize (converted to 3-channel RGB)." |
|
|
) |
|
|
|
|
|
|
|
|
st.info( |
|
|
"π― **Face Cropping**: Set to 0% padding for tightest crop on facial features" |
|
|
) |
|
|
|
|
|
|
|
|
model, device, num_classes = load_vision_model() |
|
|
if model is None: |
|
|
st.error("β Vision model could not be loaded. Please check the model file.") |
|
|
st.info("Expected model file: `models/resnet50_model.pth`") |
|
|
else: |
|
|
st.success( |
|
|
f"β
Vision model loaded successfully on {device} with {num_classes} classes!" |
|
|
) |
|
|
|
|
|
|
|
|
st.subheader("πΈ Choose Input Method") |
|
|
input_method = st.radio( |
|
|
"Select how you want to provide an image:", |
|
|
["π Upload Image File", "π· Take Photo with Camera"], |
|
|
horizontal=True, |
|
|
) |
|
|
|
|
|
if input_method == "π Upload Image File": |
|
|
|
|
|
uploaded_image = st.file_uploader( |
|
|
"Choose an image file", |
|
|
type=["png", "jpg", "jpeg", "bmp", "tiff"], |
|
|
help="Supported formats: PNG, JPG, JPEG, BMP, TIFF", |
|
|
) |
|
|
|
|
|
if uploaded_image is not None: |
|
|
|
|
|
image = Image.open(uploaded_image) |
|
|
st.image( |
|
|
image, |
|
|
caption=f"Uploaded Image: {uploaded_image.name}", |
|
|
use_container_width=True, |
|
|
) |
|
|
|
|
|
|
|
|
file_size = len(uploaded_image.getvalue()) / 1024 |
|
|
st.info( |
|
|
f"π File: {uploaded_image.name} | Size: {file_size:.1f} KB | Dimensions: {image.size[0]}x{image.size[1]}" |
|
|
) |
|
|
|
|
|
|
|
|
if st.button( |
|
|
"π Analyze Image Sentiment", type="primary", use_container_width=True |
|
|
): |
|
|
if model is None: |
|
|
st.error("β Model not loaded. Cannot analyze image.") |
|
|
else: |
|
|
with st.spinner("Analyzing image sentiment..."): |
|
|
sentiment, confidence = predict_vision_sentiment(image) |
|
|
|
|
|
|
|
|
st.markdown("### Results") |
|
|
|
|
|
col1, col2 = st.columns(2) |
|
|
with col1: |
|
|
st.metric("Sentiment", sentiment) |
|
|
with col2: |
|
|
st.metric("Confidence", f"{confidence:.2f}") |
|
|
|
|
|
|
|
|
sentiment_colors = { |
|
|
"Positive": "π’", |
|
|
"Negative": "π΄", |
|
|
"Neutral": "π‘", |
|
|
} |
|
|
|
|
|
st.markdown( |
|
|
f""" |
|
|
<div class="result-box"> |
|
|
<h4>{sentiment_colors.get(sentiment, "β")} Sentiment: {sentiment}</h4> |
|
|
<p><strong>Confidence:</strong> {confidence:.2f}</p> |
|
|
<p><strong>Image File:</strong> {uploaded_image.name}</p> |
|
|
<p><strong>Model:</strong> ResNet-50 (Fine-tuned on FER2013)</p> |
|
|
</div> |
|
|
""", |
|
|
unsafe_allow_html=True, |
|
|
) |
|
|
|
|
|
else: |
|
|
st.markdown( |
|
|
""" |
|
|
<div class="model-card"> |
|
|
<h3>π· Camera Capture</h3> |
|
|
<p>Take a photo directly with your camera to analyze its sentiment.</p> |
|
|
<p><strong>Note:</strong> Make sure your camera is accessible and you have permission to use it.</p> |
|
|
</div> |
|
|
""", |
|
|
unsafe_allow_html=True, |
|
|
) |
|
|
|
|
|
|
|
|
camera_photo = st.camera_input( |
|
|
"Take a photo", |
|
|
help="Click the camera button to take a photo, or use the upload button to select an existing photo", |
|
|
) |
|
|
|
|
|
if camera_photo is not None: |
|
|
|
|
|
image = Image.open(camera_photo) |
|
|
st.image( |
|
|
image, |
|
|
caption="Captured Photo", |
|
|
use_container_width=True, |
|
|
) |
|
|
|
|
|
|
|
|
st.info( |
|
|
f"π· Captured Photo | Dimensions: {image.size[0]}x{image.size[1]} | Format: {image.format}" |
|
|
) |
|
|
|
|
|
|
|
|
if st.button( |
|
|
"π Analyze Photo Sentiment", type="primary", use_container_width=True |
|
|
): |
|
|
if model is None: |
|
|
st.error("β Model not loaded. Cannot analyze image.") |
|
|
else: |
|
|
with st.spinner("Analyzing photo sentiment..."): |
|
|
sentiment, confidence = predict_vision_sentiment(image) |
|
|
|
|
|
|
|
|
st.markdown("### Results") |
|
|
|
|
|
col1, col2 = st.columns(2) |
|
|
with col1: |
|
|
st.metric("Sentiment", sentiment) |
|
|
with col2: |
|
|
st.metric("Confidence", f"{confidence:.2f}") |
|
|
|
|
|
|
|
|
sentiment_colors = { |
|
|
"Positive": "π’", |
|
|
"Negative": "π΄", |
|
|
"Neutral": "π‘", |
|
|
} |
|
|
|
|
|
st.markdown( |
|
|
f""" |
|
|
<div class="result-box"> |
|
|
<h4>{sentiment_colors.get(sentiment, "β")} Sentiment: {sentiment}</h4> |
|
|
<p><strong>Confidence:</strong> {confidence:.2f}</p> |
|
|
<p><strong>Image Source:</strong> Camera Capture</p> |
|
|
<p><strong>Model:</strong> ResNet-50 (Fine-tuned on FER2013)</p> |
|
|
</div> |
|
|
""", |
|
|
unsafe_allow_html=True, |
|
|
) |
|
|
|
|
|
|
|
|
if input_method == "π Upload Image File" and "uploaded_image" not in locals(): |
|
|
st.info("π Please upload an image file to begin analysis.") |
|
|
elif input_method == "π· Take Photo with Camera" and "camera_photo" not in locals(): |
|
|
st.info("π· Click the camera button above to take a photo for analysis.") |
|
|
|
|
|
|
|
|
elif page == "π Fused Model": |
|
|
st.title("π Fused Model Analysis") |
|
|
st.markdown( |
|
|
"Combine predictions from all three models for enhanced sentiment analysis." |
|
|
) |
|
|
|
|
|
st.markdown( |
|
|
""" |
|
|
<div class="model-card"> |
|
|
<h3>Multi-Modal Sentiment Analysis</h3> |
|
|
<p>This page allows you to input text, audio, and/or image data to get a comprehensive sentiment analysis |
|
|
using all three models combined.</p> |
|
|
</div> |
|
|
""", |
|
|
unsafe_allow_html=True, |
|
|
) |
|
|
|
|
|
|
|
|
col1, col2 = st.columns(2) |
|
|
|
|
|
with col1: |
|
|
st.subheader("π Text Input") |
|
|
text_input = st.text_area( |
|
|
"Enter text (optional):", |
|
|
height=100, |
|
|
placeholder="Type or paste your text here...", |
|
|
) |
|
|
|
|
|
st.subheader("π΅ Audio Input") |
|
|
|
|
|
|
|
|
st.info( |
|
|
"βΉοΈ **Audio Preprocessing**: Audio will be automatically processed to match CREMA-D + RAVDESS training format: " |
|
|
"16kHz sampling rate, max 5 seconds, with automatic resampling and feature extraction." |
|
|
) |
|
|
|
|
|
|
|
|
audio_input_method = st.radio( |
|
|
"Audio input method:", |
|
|
["π Upload File", "ποΈ Record Audio"], |
|
|
key="fused_audio_method", |
|
|
horizontal=True, |
|
|
) |
|
|
|
|
|
if audio_input_method == "π Upload File": |
|
|
uploaded_audio = st.file_uploader( |
|
|
"Upload audio file (optional):", |
|
|
type=["wav", "mp3", "m4a", "flac"], |
|
|
key="fused_audio", |
|
|
) |
|
|
audio_source = "uploaded_file" |
|
|
audio_name = uploaded_audio.name if uploaded_audio else None |
|
|
else: |
|
|
|
|
|
recorded_audio = st.audio_input( |
|
|
label="Record audio (optional):", |
|
|
key="fused_audio_recorder", |
|
|
help="Click to record audio for sentiment analysis", |
|
|
) |
|
|
|
|
|
if recorded_audio is not None: |
|
|
st.audio(recorded_audio, format="audio/wav") |
|
|
st.success("β
Audio recorded successfully!") |
|
|
uploaded_audio = recorded_audio |
|
|
audio_source = "recorded" |
|
|
audio_name = "Recorded Audio" |
|
|
else: |
|
|
uploaded_audio = None |
|
|
audio_source = None |
|
|
audio_name = None |
|
|
|
|
|
with col2: |
|
|
st.subheader("πΌοΈ Image Input") |
|
|
|
|
|
|
|
|
st.info( |
|
|
"π― **Face Cropping**: Set to 0% padding for tightest crop on facial features" |
|
|
) |
|
|
|
|
|
|
|
|
image_input_method = st.radio( |
|
|
"Image input method:", |
|
|
["π Upload File", "π· Take Photo"], |
|
|
key="fused_image_method", |
|
|
horizontal=True, |
|
|
) |
|
|
|
|
|
if image_input_method == "π Upload File": |
|
|
uploaded_image = st.file_uploader( |
|
|
"Upload image file (optional):", |
|
|
type=["png", "jpg", "jpeg", "bmp", "tiff"], |
|
|
key="fused_image", |
|
|
) |
|
|
|
|
|
if uploaded_image: |
|
|
image = Image.open(uploaded_image) |
|
|
st.image(image, caption="Uploaded Image", use_container_width=True) |
|
|
else: |
|
|
|
|
|
camera_photo = st.camera_input( |
|
|
"Take a photo (optional):", |
|
|
key="fused_camera", |
|
|
help="Click to take a photo for sentiment analysis", |
|
|
) |
|
|
|
|
|
if camera_photo: |
|
|
image = Image.open(camera_photo) |
|
|
st.image(image, caption="Captured Photo", use_container_width=True) |
|
|
|
|
|
uploaded_image = camera_photo |
|
|
|
|
|
if uploaded_audio: |
|
|
st.audio( |
|
|
uploaded_audio, format=f'audio/{uploaded_audio.name.split(".")[-1]}' |
|
|
) |
|
|
|
|
|
|
|
|
if st.button("π Run Fused Analysis", type="primary", use_container_width=True): |
|
|
if text_input or uploaded_audio or uploaded_image: |
|
|
with st.spinner("Running fused sentiment analysis..."): |
|
|
|
|
|
audio_bytes = uploaded_audio.getvalue() if uploaded_audio else None |
|
|
image = Image.open(uploaded_image) if uploaded_image else None |
|
|
|
|
|
|
|
|
sentiment, confidence = predict_fused_sentiment( |
|
|
text=text_input if text_input else None, |
|
|
audio_bytes=audio_bytes, |
|
|
image=image, |
|
|
) |
|
|
|
|
|
|
|
|
st.markdown("### Fused Model Results") |
|
|
|
|
|
col1, col2 = st.columns(2) |
|
|
with col1: |
|
|
st.metric("Final Sentiment", sentiment) |
|
|
with col2: |
|
|
st.metric("Overall Confidence", f"{confidence:.2f}") |
|
|
|
|
|
|
|
|
st.markdown("### Individual Model Results") |
|
|
|
|
|
results_data = [] |
|
|
|
|
|
if text_input: |
|
|
text_sentiment, text_conf = predict_text_sentiment(text_input) |
|
|
results_data.append( |
|
|
{ |
|
|
"Model": "Text (TextBlob) β
", |
|
|
"Input": f"Text: {text_input[:50]}...", |
|
|
"Sentiment": text_sentiment, |
|
|
"Confidence": f"{text_conf:.2f}", |
|
|
} |
|
|
) |
|
|
|
|
|
if uploaded_audio: |
|
|
audio_sentiment, audio_conf = predict_audio_sentiment(audio_bytes) |
|
|
results_data.append( |
|
|
{ |
|
|
"Model": "Audio (Wav2Vec2) β
", |
|
|
"Input": f"Audio: {audio_name}", |
|
|
"Sentiment": audio_sentiment, |
|
|
"Confidence": f"{audio_conf:.2f}", |
|
|
} |
|
|
) |
|
|
|
|
|
if uploaded_image: |
|
|
|
|
|
vision_sentiment, vision_conf = predict_vision_sentiment( |
|
|
image, crop_tightness=0.0 |
|
|
) |
|
|
results_data.append( |
|
|
{ |
|
|
"Model": "Vision (ResNet-50)", |
|
|
"Input": f"Image: {uploaded_image.name}", |
|
|
"Sentiment": vision_sentiment, |
|
|
"Confidence": f"{vision_conf:.2f}", |
|
|
} |
|
|
) |
|
|
|
|
|
if results_data: |
|
|
df = pd.DataFrame(results_data) |
|
|
st.dataframe(df, use_container_width=True) |
|
|
|
|
|
|
|
|
sentiment_colors = {"Positive": "π’", "Negative": "π΄", "Neutral": "π‘"} |
|
|
|
|
|
st.markdown( |
|
|
f""" |
|
|
<div class="result-box"> |
|
|
<h4>{sentiment_colors.get(sentiment, "β")} Final Fused Sentiment: {sentiment}</h4> |
|
|
<p><strong>Overall Confidence:</strong> {confidence:.2f}</p> |
|
|
<p><strong>Models Used:</strong> {len(results_data)}</p> |
|
|
</div> |
|
|
""", |
|
|
unsafe_allow_html=True, |
|
|
) |
|
|
else: |
|
|
st.warning( |
|
|
"β οΈ Please provide at least one input (text, audio, or image) for fused analysis." |
|
|
) |
|
|
|
|
|
|
|
|
st.markdown("---") |
|
|
st.markdown( |
|
|
""" |
|
|
<div style="text-align: center; color: #666; padding: 1rem;"> |
|
|
<p>Built with β€οΈ | by <a href="https://github.com/iamfaham">iamfaham</a></p> |
|
|
</div> |
|
|
""", |
|
|
unsafe_allow_html=True, |
|
|
) |
|
|
|