Spaces:

TanRJ
/

MoodSyncAI

Sleeping

App Files Files Community

TanRJ commited on 17 days ago

Commit

5b2ee5d

verified ·

1 Parent(s): 5f1dc09

Update app.py

Browse files

Files changed (1) hide show

app.py +152 -133

app.py CHANGED Viewed

@@ -1,207 +1,226 @@
 import streamlit as st
 from PIL import Image
-from transformers import pipeline
-import pandas as pd
-import plotly.express as px
-st.set_page_config(
-    page_title="MoodSyncAI",
-    layout="wide"
 )
-@st.cache_resource
-def load_models():
-    image_model = pipeline(
-        "image-classification",
-        model="dima806/facial_emotions_image_detection"
-    )
-    text_model = pipeline(
-        "text-classification",
-        model="cardiffnlp/twitter-roberta-base-sentiment-latest",
-        top_k=None
-    )
-    return image_model, text_model
-def normalize_text_label(label):
-    label = label.lower()
-    if "positive" in label:
-        return "positive"
-    elif "negative" in label:
-        return "negative"
-    else:
-        return "neutral"
-def map_emotion_to_sentiment(emotion):
-    emotion = emotion.lower()
-    positive_emotions = ["happy", "surprise"]
-    negative_emotions = ["sad", "angry", "fear", "disgust"]
-    if emotion in positive_emotions:
-        return "positive"
-    elif emotion in negative_emotions:
-        return "negative"
-    else:
-        return "neutral"
-def get_top_prediction(predictions):
-    return max(predictions, key=lambda x: x["score"])
-def create_bar_chart(predictions, title):
-    df = pd.DataFrame(predictions)
-    df["score"] = df["score"] * 100
-    fig = px.bar(
-        df,
-        x="label",
-        y="score",
-        title=title,
-        text=df["score"].round(2)
-    )
-    fig.update_layout(yaxis_title="Confidence (%)", xaxis_title="Class")
-    return fig
-def fusion_logic(image_emotion, image_score, text_sentiment, text_score):
-    image_sentiment = map_emotion_to_sentiment(image_emotion)
-    if image_sentiment == text_sentiment:
-        status = "ALIGNED"
-        badge = "🟢 Aligned"
-        confidence = round((image_score + text_score) / 2 * 100, 2)
     else:
-        status = "MISMATCH DETECTED"
-        badge = "🟠 Mismatch Detected"
-        confidence = round(abs(image_score - text_score) * 100, 2)
-    return image_sentiment, status, badge, confidence
-def generate_summary(image_emotion, image_sentiment, text_sentiment, fusion_status):
-    if fusion_status == "ALIGNED":
-        return (
-            f"The person's facial expression appears {image_emotion}, "
-            f"which is generally consistent with the {text_sentiment} tone of the text. "
-            f"Both visual and textual signals suggest an emotionally aligned state."
         )
-    return (
-        f"The person's face appears to show {image_emotion}, which suggests a "
-        f"{image_sentiment} emotional signal. However, the text expresses a "
-        f"{text_sentiment} sentiment. This indicates a possible emotional mismatch, "
-        f"where the spoken words and facial cues may not fully agree."
-    )
-st.title("🧠 MoodSyncAI: Multi-Modal Sentiment & Emotion Analyser")
-st.write(
-    "Upload a face image and enter the sentence spoken by the person. "
-    "The app analyses visual emotion, textual sentiment, detects mismatch, "
-    "and generates a plain-language emotional summary."
-)
-image_model, text_model = load_models()
-col1, col2 = st.columns(2)
-with col1:
-    uploaded_image = st.file_uploader(
-        "Upload face image",
-        type=["jpg", "jpeg", "png"]
-    )
-with col2:
-    user_text = st.text_area(
-        "Enter the sentence spoken by the person",
-        placeholder="Example: No, I think the project is going really well."
-    )
-if st.button("Analyse Emotion"):
-    if uploaded_image is None:
-        st.error("Please upload a face image.")
-    elif user_text.strip() == "":
-        st.error("Please enter a sentence.")
-    else:
-        image = Image.open(uploaded_image).convert("RGB")
-        st.subheader("Uploaded Image")
-        st.image(image, width=300)
-        image_predictions = image_model(image)
-        text_predictions = text_model(user_text)[0]
-        image_top = get_top_prediction(image_predictions)
-        text_top = get_top_prediction(text_predictions)
-        image_emotion = image_top["label"]
-        image_score = image_top["score"]
-        text_sentiment = normalize_text_label(text_top["label"])
-        text_score = text_top["score"]
-        image_sentiment, fusion_status, badge, fusion_confidence = fusion_logic(
-            image_emotion,
-            image_score,
-            text_sentiment,
-            text_score
         )
         st.divider()
-        result_col1, result_col2, result_col3 = st.columns(3)
-        with result_col1:
             st.metric(
                 "Visual Emotion",
                 image_emotion,
                 f"{round(image_score * 100, 2)}%"
             )
-        with result_col2:
             st.metric(
-                "Textual Sentiment",
                 text_sentiment.capitalize(),
                 f"{round(text_score * 100, 2)}%"
             )
-        with result_col3:
             st.metric(
                 "Fusion Result",
-                badge,
-                f"{fusion_confidence}%"
             )
         st.divider()
-        chart_col1, chart_col2 = st.columns(2)
-        with chart_col1:
             st.plotly_chart(
-                create_bar_chart(image_predictions, "Visual Emotion Confidence"),
                 use_container_width=True
             )
-        with chart_col2:
             st.plotly_chart(
-                create_bar_chart(text_predictions, "Text Sentiment Confidence"),
                 use_container_width=True
             )
         st.divider()
         summary = generate_summary(
-            image_emotion,
-            image_sentiment,
-            text_sentiment,
-            fusion_status
         )
         st.subheader("Generative Summary")
-        st.success(summary)

 import streamlit as st
 from PIL import Image
+from models import (
+    load_models,
+    analyse_image,
+    analyse_text,
+    transcribe_audio
 )
+from fusion import (
+    fusion_logic,
+    generate_summary,
+    create_bar_chart,
+    create_timeline_chart
+)
+st.set_page_config(
+    page_title="MoodSyncAI",
+    page_icon="🧠",
+    layout="wide"
+)
+st.title("🧠 MoodSyncAI")
+st.write(
+    "Multi-modal emotion and sentiment analyser using image, text, audio, and webcam input."
+)
+image_model, text_model, whisper_model = load_models()
+if "emotion_timeline" not in st.session_state:
+    st.session_state.emotion_timeline = []
+input_mode = st.radio(
+    "Choose image input mode",
+    ["Upload Image", "Use Webcam"]
+)
+uploaded_image = None
+webcam_image = None
+if input_mode == "Upload Image":
+    uploaded_image = st.file_uploader(
+        "Upload face image",
+        type=["jpg", "jpeg", "png"]
+    )
+else:
+    webcam_image = st.camera_input(
+        "Capture face from webcam"
+    )
+uploaded_audio = st.file_uploader(
+    "Upload audio clip (optional)",
+    type=["wav", "mp3", "m4a"]
+)
+user_text = st.text_area(
+    "Enter text"
+)
+if st.button("Analyse Emotion"):
+    image_source = uploaded_image if input_mode == "Upload Image" else webcam_image
+    if image_source is None:
+        st.error("Please upload an image or capture from webcam.")
+    elif user_text.strip() == "" and uploaded_audio is None:
+        st.error("Please enter text or upload audio.")
     else:
+        image = Image.open(image_source).convert("RGB")
+        st.image(image, width=300)
+        image_predictions, image_emotion, image_score = analyse_image(
+            image_model,
+            image
         )
+        if input_mode == "Use Webcam":
+            st.session_state.emotion_timeline.append(
+                {
+                    "frame": len(st.session_state.emotion_timeline) + 1,
+                    "emotion": image_emotion,
+                    "confidence": round(image_score * 100, 2)
+                }
+            )
+        final_text = user_text.strip()
+        audio_transcript = ""
+        if uploaded_audio is not None:
+            st.audio(uploaded_audio)
+            audio_transcript = transcribe_audio(
+                whisper_model,
+                uploaded_audio
+            )
+            st.info(f"Audio Transcript: {audio_transcript}")
+            if final_text == "":
+                final_text = audio_transcript
+            else:
+                final_text = final_text + " " + audio_transcript
+        text_predictions, text_sentiment, text_score = analyse_text(
+            text_model,
+            final_text
+        )
+        audio_sentiment = "not provided"
+        audio_score = 0.0
+        audio_predictions = None
+        if audio_transcript.strip() != "":
+            audio_predictions, audio_sentiment, audio_score = analyse_text(
+                text_model,
+                audio_transcript
+            )
+        fusion_result = fusion_logic(
+            image_emotion=image_emotion,
+            image_score=image_score,
+            text_sentiment=text_sentiment,
+            text_score=text_score,
+            audio_sentiment=audio_sentiment,
+            audio_score=audio_score
         )
         st.divider()
+        col1, col2, col3, col4 = st.columns(4)
+        with col1:
             st.metric(
                 "Visual Emotion",
                 image_emotion,
                 f"{round(image_score * 100, 2)}%"
             )
+        with col2:
             st.metric(
+                "Text Sentiment",
                 text_sentiment.capitalize(),
                 f"{round(text_score * 100, 2)}%"
             )
+        with col3:
+            if audio_transcript.strip() != "":
+                st.metric(
+                    "Audio Sentiment",
+                    audio_sentiment.capitalize(),
+                    f"{round(audio_score * 100, 2)}%"
+                )
+            else:
+                st.metric(
+                    "Audio Sentiment",
+                    "Not Provided",
+                    "Optional"
+                )
+        with col4:
             st.metric(
                 "Fusion Result",
+                fusion_result["badge"],
+                f'{fusion_result["confidence"]}%'
             )
         st.divider()
+        st.plotly_chart(
+            create_bar_chart(
+                image_predictions,
+                "Visual Emotion Confidence"
+            ),
+            use_container_width=True
+        )
+        st.plotly_chart(
+            create_bar_chart(
+                text_predictions,
+                "Text Sentiment Confidence"
+            ),
+            use_container_width=True
+        )
+        if audio_predictions is not None:
             st.plotly_chart(
+                create_bar_chart(
+                    audio_predictions,
+                    "Audio Sentiment Confidence"
+                ),
                 use_container_width=True
             )
+        if input_mode == "Use Webcam" and len(st.session_state.emotion_timeline) > 0:
+            st.subheader("Webcam Emotion Timeline")
             st.plotly_chart(
+                create_timeline_chart(st.session_state.emotion_timeline),
                 use_container_width=True
             )
+            if st.button("Clear Webcam Timeline"):
+                st.session_state.emotion_timeline = []
+                st.rerun()
         st.divider()
         summary = generate_summary(
+            image_emotion=image_emotion,
+            image_sentiment=fusion_result["image_sentiment"],
+            text_sentiment=text_sentiment,
+            audio_sentiment=audio_sentiment,
+            fusion_status=fusion_result["status"],
+            audio_used=audio_transcript.strip() != ""
         )
         st.subheader("Generative Summary")
+        st.success(summary)