Spaces:

pathananas
/

multimodal-ai-engine

Sleeping

App Files Files Community

pathananas commited on Mar 13

Commit

6311c46

verified ·

1 Parent(s): 56a2edf

Update model.py

Browse files

Files changed (1) hide show

model.py +13 -8

model.py CHANGED Viewed

@@ -205,39 +205,44 @@ Confidence: **{audio_conf}%**
     if text_label:
         reasoning_lines.append(
             f"The text expresses a {text_label.lower()} sentiment with emotion "
             f"'{emotion_label}'. The topic appears related to {topic_label}."
         )
     # ================= IMAGE REASONING =================
     if image_label:
         reasoning_lines.append(
-            f"The object classifier predicted '{image_label}' ({image_conf}% confidence), "
             "though this may be an approximate category."
         )
         if caption:
             reasoning_lines.append(
-                f"The scene description suggests: '{caption}', which provides a more detailed "
-                "interpretation of the visual content."
             )
     if audio_label:
         reasoning_lines.append(
             f"Spoken content carries a {audio_label.lower()} tone ({audio_conf}%)."
         )
     if text_label and audio_label:
         if text_label == "POSITIVE" and audio_label == "NEGATIVE":
             reasoning_lines.append(
-                "⚠ Cross-modal inconsistency detected: the text expresses positivity "
                 "but the voice tone suggests negativity."
             )
         if text_label == "NEGATIVE" and audio_label == "POSITIVE":
-            reasoning_lines.append(
-                "⚠ Cross-modal inconsistency detected: the text expresses negativity "
-                "while the voice tone appears positive."
-            )
     if image_label and topic_label:
         if topic_label == "technology":
             reasoning_lines.append(
                 "The textual topic relates to technology, which aligns with the detected visual content."
             )

     if text_label:
         reasoning_lines.append(
+            f"<b style='color:#60a5fa;'>📝 Text Analysis:</b> "
             f"The text expresses a {text_label.lower()} sentiment with emotion "
             f"'{emotion_label}'. The topic appears related to {topic_label}."
         )
     # ================= IMAGE REASONING =================
     if image_label:
         reasoning_lines.append(
+            f"<b style='color:#22c55e;'>🖼 Image Analysis:</b> "
+            f"Classifier predicted '{image_label}' ({image_conf}% confidence), "
             "though this may be an approximate category."
         )
         if caption:
             reasoning_lines.append(
+                f"<b style='color:#a78bfa;'>📷 Scene Description:</b> "
+                f"'{caption}', providing a clearer interpretation of the image."
             )
     if audio_label:
         reasoning_lines.append(
+            f"<b style='color:#f59e0b;'>🎙 Audio Tone:</b> "
             f"Spoken content carries a {audio_label.lower()} tone ({audio_conf}%)."
         )
     if text_label and audio_label:
         if text_label == "POSITIVE" and audio_label == "NEGATIVE":
             reasoning_lines.append(
+                "<b style='color:#ef4444;'>⚠️ Cross-Modal Conflict:</b> "
+                " the text expresses positivity "
                 "but the voice tone suggests negativity."
             )
         if text_label == "NEGATIVE" and audio_label == "POSITIVE":
+           reasoning_lines.append(
+               "<b style='color:#ef4444;'>⚠ Cross-Modal Conflict:</b> "
+               "Text expresses negativity while the voice tone appears positive."
+)
     if image_label and topic_label:
         if topic_label == "technology":
             reasoning_lines.append(
+                "<b style='color:#ef4444;'>💻🤖 Technology:</b> "
                 "The textual topic relates to technology, which aligns with the detected visual content."
             )