TanRJ commited on
Commit
5b2ee5d
·
verified ·
1 Parent(s): 5f1dc09

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +152 -133
app.py CHANGED
@@ -1,207 +1,226 @@
1
  import streamlit as st
2
  from PIL import Image
3
- from transformers import pipeline
4
- import pandas as pd
5
- import plotly.express as px
6
 
7
- st.set_page_config(
8
- page_title="MoodSyncAI",
9
- layout="wide"
 
 
10
  )
11
 
12
- @st.cache_resource
13
- def load_models():
14
- image_model = pipeline(
15
- "image-classification",
16
- model="dima806/facial_emotions_image_detection"
17
- )
18
 
19
- text_model = pipeline(
20
- "text-classification",
21
- model="cardiffnlp/twitter-roberta-base-sentiment-latest",
22
- top_k=None
23
- )
24
 
25
- return image_model, text_model
26
 
 
 
 
27
 
28
- def normalize_text_label(label):
29
- label = label.lower()
30
 
31
- if "positive" in label:
32
- return "positive"
33
- elif "negative" in label:
34
- return "negative"
35
- else:
36
- return "neutral"
37
 
 
 
 
 
38
 
39
- def map_emotion_to_sentiment(emotion):
40
- emotion = emotion.lower()
41
 
42
- positive_emotions = ["happy", "surprise"]
43
- negative_emotions = ["sad", "angry", "fear", "disgust"]
 
 
 
44
 
45
- if emotion in positive_emotions:
46
- return "positive"
47
- elif emotion in negative_emotions:
48
- return "negative"
49
- else:
50
- return "neutral"
51
 
 
 
 
 
52
 
53
- def get_top_prediction(predictions):
54
- return max(predictions, key=lambda x: x["score"])
 
55
 
 
56
 
57
- def create_bar_chart(predictions, title):
58
- df = pd.DataFrame(predictions)
59
- df["score"] = df["score"] * 100
60
- fig = px.bar(
61
- df,
62
- x="label",
63
- y="score",
64
- title=title,
65
- text=df["score"].round(2)
66
- )
67
- fig.update_layout(yaxis_title="Confidence (%)", xaxis_title="Class")
68
- return fig
69
 
 
 
70
 
71
- def fusion_logic(image_emotion, image_score, text_sentiment, text_score):
72
- image_sentiment = map_emotion_to_sentiment(image_emotion)
73
 
74
- if image_sentiment == text_sentiment:
75
- status = "ALIGNED"
76
- badge = "🟢 Aligned"
77
- confidence = round((image_score + text_score) / 2 * 100, 2)
78
  else:
79
- status = "MISMATCH DETECTED"
80
- badge = "🟠 Mismatch Detected"
81
- confidence = round(abs(image_score - text_score) * 100, 2)
82
-
83
- return image_sentiment, status, badge, confidence
84
 
 
85
 
86
- def generate_summary(image_emotion, image_sentiment, text_sentiment, fusion_status):
87
- if fusion_status == "ALIGNED":
88
- return (
89
- f"The person's facial expression appears {image_emotion}, "
90
- f"which is generally consistent with the {text_sentiment} tone of the text. "
91
- f"Both visual and textual signals suggest an emotionally aligned state."
92
  )
93
 
94
- return (
95
- f"The person's face appears to show {image_emotion}, which suggests a "
96
- f"{image_sentiment} emotional signal. However, the text expresses a "
97
- f"{text_sentiment} sentiment. This indicates a possible emotional mismatch, "
98
- f"where the spoken words and facial cues may not fully agree."
99
- )
100
-
101
-
102
- st.title("🧠 MoodSyncAI: Multi-Modal Sentiment & Emotion Analyser")
103
-
104
- st.write(
105
- "Upload a face image and enter the sentence spoken by the person. "
106
- "The app analyses visual emotion, textual sentiment, detects mismatch, "
107
- "and generates a plain-language emotional summary."
108
- )
109
 
110
- image_model, text_model = load_models()
 
111
 
112
- col1, col2 = st.columns(2)
113
 
114
- with col1:
115
- uploaded_image = st.file_uploader(
116
- "Upload face image",
117
- type=["jpg", "jpeg", "png"]
118
- )
119
 
120
- with col2:
121
- user_text = st.text_area(
122
- "Enter the sentence spoken by the person",
123
- placeholder="Example: No, I think the project is going really well."
124
- )
125
 
126
- if st.button("Analyse Emotion"):
127
- if uploaded_image is None:
128
- st.error("Please upload a face image.")
129
- elif user_text.strip() == "":
130
- st.error("Please enter a sentence.")
131
- else:
132
- image = Image.open(uploaded_image).convert("RGB")
133
 
134
- st.subheader("Uploaded Image")
135
- st.image(image, width=300)
 
 
136
 
137
- image_predictions = image_model(image)
138
- text_predictions = text_model(user_text)[0]
 
 
139
 
140
- image_top = get_top_prediction(image_predictions)
141
- text_top = get_top_prediction(text_predictions)
 
142
 
143
- image_emotion = image_top["label"]
144
- image_score = image_top["score"]
145
 
146
- text_sentiment = normalize_text_label(text_top["label"])
147
- text_score = text_top["score"]
 
 
148
 
149
- image_sentiment, fusion_status, badge, fusion_confidence = fusion_logic(
150
- image_emotion,
151
- image_score,
152
- text_sentiment,
153
- text_score
 
 
154
  )
155
 
156
  st.divider()
157
 
158
- result_col1, result_col2, result_col3 = st.columns(3)
159
 
160
- with result_col1:
161
  st.metric(
162
  "Visual Emotion",
163
  image_emotion,
164
  f"{round(image_score * 100, 2)}%"
165
  )
166
 
167
- with result_col2:
168
  st.metric(
169
- "Textual Sentiment",
170
  text_sentiment.capitalize(),
171
  f"{round(text_score * 100, 2)}%"
172
  )
173
 
174
- with result_col3:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  st.metric(
176
  "Fusion Result",
177
- badge,
178
- f"{fusion_confidence}%"
179
  )
180
 
181
  st.divider()
182
 
183
- chart_col1, chart_col2 = st.columns(2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
 
185
- with chart_col1:
186
  st.plotly_chart(
187
- create_bar_chart(image_predictions, "Visual Emotion Confidence"),
 
 
 
188
  use_container_width=True
189
  )
190
 
191
- with chart_col2:
 
 
192
  st.plotly_chart(
193
- create_bar_chart(text_predictions, "Text Sentiment Confidence"),
194
  use_container_width=True
195
  )
196
 
 
 
 
 
197
  st.divider()
198
 
199
  summary = generate_summary(
200
- image_emotion,
201
- image_sentiment,
202
- text_sentiment,
203
- fusion_status
 
 
204
  )
205
 
206
  st.subheader("Generative Summary")
207
- st.success(summary)
 
1
  import streamlit as st
2
  from PIL import Image
 
 
 
3
 
4
+ from models import (
5
+ load_models,
6
+ analyse_image,
7
+ analyse_text,
8
+ transcribe_audio
9
  )
10
 
11
+ from fusion import (
12
+ fusion_logic,
13
+ generate_summary,
14
+ create_bar_chart,
15
+ create_timeline_chart
16
+ )
17
 
18
+ st.set_page_config(
19
+ page_title="MoodSyncAI",
20
+ page_icon="🧠",
21
+ layout="wide"
22
+ )
23
 
24
+ st.title("🧠 MoodSyncAI")
25
 
26
+ st.write(
27
+ "Multi-modal emotion and sentiment analyser using image, text, audio, and webcam input."
28
+ )
29
 
30
+ image_model, text_model, whisper_model = load_models()
 
31
 
32
+ if "emotion_timeline" not in st.session_state:
33
+ st.session_state.emotion_timeline = []
 
 
 
 
34
 
35
+ input_mode = st.radio(
36
+ "Choose image input mode",
37
+ ["Upload Image", "Use Webcam"]
38
+ )
39
 
40
+ uploaded_image = None
41
+ webcam_image = None
42
 
43
+ if input_mode == "Upload Image":
44
+ uploaded_image = st.file_uploader(
45
+ "Upload face image",
46
+ type=["jpg", "jpeg", "png"]
47
+ )
48
 
49
+ else:
50
+ webcam_image = st.camera_input(
51
+ "Capture face from webcam"
52
+ )
 
 
53
 
54
+ uploaded_audio = st.file_uploader(
55
+ "Upload audio clip (optional)",
56
+ type=["wav", "mp3", "m4a"]
57
+ )
58
 
59
+ user_text = st.text_area(
60
+ "Enter text"
61
+ )
62
 
63
+ if st.button("Analyse Emotion"):
64
 
65
+ image_source = uploaded_image if input_mode == "Upload Image" else webcam_image
 
 
 
 
 
 
 
 
 
 
 
66
 
67
+ if image_source is None:
68
+ st.error("Please upload an image or capture from webcam.")
69
 
70
+ elif user_text.strip() == "" and uploaded_audio is None:
71
+ st.error("Please enter text or upload audio.")
72
 
 
 
 
 
73
  else:
74
+ image = Image.open(image_source).convert("RGB")
 
 
 
 
75
 
76
+ st.image(image, width=300)
77
 
78
+ image_predictions, image_emotion, image_score = analyse_image(
79
+ image_model,
80
+ image
 
 
 
81
  )
82
 
83
+ if input_mode == "Use Webcam":
84
+ st.session_state.emotion_timeline.append(
85
+ {
86
+ "frame": len(st.session_state.emotion_timeline) + 1,
87
+ "emotion": image_emotion,
88
+ "confidence": round(image_score * 100, 2)
89
+ }
90
+ )
 
 
 
 
 
 
 
91
 
92
+ final_text = user_text.strip()
93
+ audio_transcript = ""
94
 
95
+ if uploaded_audio is not None:
96
 
97
+ st.audio(uploaded_audio)
 
 
 
 
98
 
99
+ audio_transcript = transcribe_audio(
100
+ whisper_model,
101
+ uploaded_audio
102
+ )
 
103
 
104
+ st.info(f"Audio Transcript: {audio_transcript}")
 
 
 
 
 
 
105
 
106
+ if final_text == "":
107
+ final_text = audio_transcript
108
+ else:
109
+ final_text = final_text + " " + audio_transcript
110
 
111
+ text_predictions, text_sentiment, text_score = analyse_text(
112
+ text_model,
113
+ final_text
114
+ )
115
 
116
+ audio_sentiment = "not provided"
117
+ audio_score = 0.0
118
+ audio_predictions = None
119
 
120
+ if audio_transcript.strip() != "":
 
121
 
122
+ audio_predictions, audio_sentiment, audio_score = analyse_text(
123
+ text_model,
124
+ audio_transcript
125
+ )
126
 
127
+ fusion_result = fusion_logic(
128
+ image_emotion=image_emotion,
129
+ image_score=image_score,
130
+ text_sentiment=text_sentiment,
131
+ text_score=text_score,
132
+ audio_sentiment=audio_sentiment,
133
+ audio_score=audio_score
134
  )
135
 
136
  st.divider()
137
 
138
+ col1, col2, col3, col4 = st.columns(4)
139
 
140
+ with col1:
141
  st.metric(
142
  "Visual Emotion",
143
  image_emotion,
144
  f"{round(image_score * 100, 2)}%"
145
  )
146
 
147
+ with col2:
148
  st.metric(
149
+ "Text Sentiment",
150
  text_sentiment.capitalize(),
151
  f"{round(text_score * 100, 2)}%"
152
  )
153
 
154
+ with col3:
155
+ if audio_transcript.strip() != "":
156
+ st.metric(
157
+ "Audio Sentiment",
158
+ audio_sentiment.capitalize(),
159
+ f"{round(audio_score * 100, 2)}%"
160
+ )
161
+ else:
162
+ st.metric(
163
+ "Audio Sentiment",
164
+ "Not Provided",
165
+ "Optional"
166
+ )
167
+
168
+ with col4:
169
  st.metric(
170
  "Fusion Result",
171
+ fusion_result["badge"],
172
+ f'{fusion_result["confidence"]}%'
173
  )
174
 
175
  st.divider()
176
 
177
+ st.plotly_chart(
178
+ create_bar_chart(
179
+ image_predictions,
180
+ "Visual Emotion Confidence"
181
+ ),
182
+ use_container_width=True
183
+ )
184
+
185
+ st.plotly_chart(
186
+ create_bar_chart(
187
+ text_predictions,
188
+ "Text Sentiment Confidence"
189
+ ),
190
+ use_container_width=True
191
+ )
192
 
193
+ if audio_predictions is not None:
194
  st.plotly_chart(
195
+ create_bar_chart(
196
+ audio_predictions,
197
+ "Audio Sentiment Confidence"
198
+ ),
199
  use_container_width=True
200
  )
201
 
202
+ if input_mode == "Use Webcam" and len(st.session_state.emotion_timeline) > 0:
203
+ st.subheader("Webcam Emotion Timeline")
204
+
205
  st.plotly_chart(
206
+ create_timeline_chart(st.session_state.emotion_timeline),
207
  use_container_width=True
208
  )
209
 
210
+ if st.button("Clear Webcam Timeline"):
211
+ st.session_state.emotion_timeline = []
212
+ st.rerun()
213
+
214
  st.divider()
215
 
216
  summary = generate_summary(
217
+ image_emotion=image_emotion,
218
+ image_sentiment=fusion_result["image_sentiment"],
219
+ text_sentiment=text_sentiment,
220
+ audio_sentiment=audio_sentiment,
221
+ fusion_status=fusion_result["status"],
222
+ audio_used=audio_transcript.strip() != ""
223
  )
224
 
225
  st.subheader("Generative Summary")
226
+ st.success(summary)