Spaces:

pathananas
/

multimodal-ai-engine

Sleeping

App Files Files Community

pathananas commited on Mar 5

Commit

13073b4

verified ·

1 Parent(s): 610d85e

model.py

Browse files

Files changed (1) hide show

model.py +59 -49

model.py CHANGED Viewed

@@ -5,10 +5,11 @@
 from database import save_analysis
 from datetime import datetime
 from transformers import pipeline
 import torch
 import time
-from datetime import datetime
-from fusion import compute_fusion
 device = 0 if torch.cuda.is_available() else -1
@@ -34,17 +35,8 @@ audio_pipeline = pipeline(
 print("Models loaded successfully.")
-analysis_history = []
-def clear_history():
-    global analysis_history
-    analysis_history.clear()
-    return analysis_history
 def multimodal_analyze(text, image, audio):
-    global analysis_history
     start_time = time.time()
@@ -60,12 +52,10 @@ def multimodal_analyze(text, image, audio):
     image_result_display = "No image provided."
     audio_result_display = "No audio provided."
-    # -------- TEXT --------
     if text and text.strip():
         try:
             res = text_pipeline(text)[0]
-            raw_label = res["label"]
-            text_conf = round(res["score"] * 100, 2)
             label_map = {
                 "LABEL_0": "NEGATIVE",
@@ -73,43 +63,54 @@ def multimodal_analyze(text, image, audio):
                 "LABEL_2": "POSITIVE"
             }
-            text_label = label_map.get(raw_label, raw_label)
             text_result_display = f"""
 ## 📝 Text Sentiment
-**Prediction:** {text_label}
-**Confidence:** {text_conf}%
 """
         except Exception as e:
-            text_result_display = f"Text processing error: {str(e)}"
-    # -------- IMAGE --------
     if image is not None:
         try:
-            class_res = image_pipeline(image)
             image_result_display = "## 🖼 Image Classification\n\n"
-            for r in class_res[:3]:
                 label = r["label"]
                 conf = round(r["score"] * 100, 2)
                 image_result_display += f"- **{label}** ({conf}%)\n"
-            image_label = class_res[0]["label"]
-            image_conf = round(class_res[0]["score"] * 100, 2)
         except Exception as e:
-            image_result_display = f"Image processing error: {str(e)}"
-    # -------- AUDIO --------
     if audio is not None:
         try:
             res = audio_pipeline(audio)
             transcription = res["text"]
-            audio_sent = text_pipeline(transcription)[0]
-            raw_audio_label = audio_sent["label"]
-            audio_conf = round(audio_sent["score"] * 100, 2)
             label_map = {
                 "LABEL_0": "NEGATIVE",
@@ -117,21 +118,25 @@ def multimodal_analyze(text, image, audio):
                 "LABEL_2": "POSITIVE"
             }
-            audio_label = label_map.get(raw_audio_label, raw_audio_label)
             audio_result_display = f"""
 ## 🎙 Audio Intelligence
-**Transcription:**
 "{transcription}"
-**Detected Tone:** {audio_label}
-({audio_conf}%)
 """
         except Exception as e:
-            audio_result_display = f"Audio processing error: {str(e)}"
-    # -------- FUSION --------
-    fusion_score, reasoning_lines, alignment_message, color = compute_fusion(
         text_label, text_conf,
         image_label, image_conf,
         audio_label, audio_conf
@@ -141,29 +146,34 @@ def multimodal_analyze(text, image, audio):
     fusion_summary = f"""
 <h2>🔎 Multimodal Intelligence Summary</h2>
-{"<br>".join(reasoning_lines)}
 <hr>
-<h3>📊 Fusion Score</h3>
-<span style="color:{color}; font-size:22px; font-weight:bold;">
 {round(fusion_score,2)}
 </span>
 <hr>
-<h3>🧠 Contextual Interpretation</h3>
-<p>{alignment_message}</p>
 <br>
-⏱ Processing Time: {processing_time} seconds
 """
     save_analysis({
         "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
         "text": text,
         "image": image_label,
         "audio": audio_label,
-        "fusion_score": round(fusion_score, 2)
-})
-    return fusion_summary, text_result_display, image_result_display, audio_result_display
-    #temp
-    #temp

 from database import save_analysis
 from datetime import datetime
 from transformers import pipeline
+from fusion import compute_fusion
 import torch
 import time
+import base64
+from io import BytesIO
 device = 0 if torch.cuda.is_available() else -1
 print("Models loaded successfully.")
 def multimodal_analyze(text, image, audio):
     start_time = time.time()
     image_result_display = "No image provided."
     audio_result_display = "No audio provided."
+    # ================= TEXT =================
     if text and text.strip():
         try:
             res = text_pipeline(text)[0]
             label_map = {
                 "LABEL_0": "NEGATIVE",
                 "LABEL_2": "POSITIVE"
             }
+            text_label = label_map.get(res["label"], res["label"])
+            text_conf = round(res["score"] * 100, 2)
             text_result_display = f"""
 ## 📝 Text Sentiment
+Prediction: **{text_label}**
+Confidence: **{text_conf}%**
 """
         except Exception as e:
+            text_result_display = f"Text error: {str(e)}"
+    # ================= IMAGE =================
     if image is not None:
         try:
+            results = image_pipeline(image)
             image_result_display = "## 🖼 Image Classification\n\n"
+            for r in results[:3]:
                 label = r["label"]
                 conf = round(r["score"] * 100, 2)
                 image_result_display += f"- **{label}** ({conf}%)\n"
+            image_label = results[0]["label"]
+            image_conf = round(results[0]["score"] * 100, 2)
+            # image preview
+            buffer = BytesIO()
+            image.save(buffer, format="PNG")
+            img_str = base64.b64encode(buffer.getvalue()).decode()
+            image_result_display += f"""
+<br>
+<img src="data:image/png;base64,{img_str}" width="200" style="border-radius:10px;">
+"""
         except Exception as e:
+            image_result_display = f"Image error: {str(e)}"
+    # ================= AUDIO =================
     if audio is not None:
         try:
             res = audio_pipeline(audio)
             transcription = res["text"]
+            sent = text_pipeline(transcription)[0]
             label_map = {
                 "LABEL_0": "NEGATIVE",
                 "LABEL_2": "POSITIVE"
             }
+            audio_label = label_map.get(sent["label"], sent["label"])
+            audio_conf = round(sent["score"] * 100, 2)
             audio_result_display = f"""
 ## 🎙 Audio Intelligence
+Transcription:
 "{transcription}"
+Tone: **{audio_label}**
+Confidence: **{audio_conf}%**
 """
         except Exception as e:
+            audio_result_display = f"Audio error: {str(e)}"
+    # ================= FUSION =================
+    fusion_score, reasoning, interpretation, color = compute_fusion(
         text_label, text_conf,
         image_label, image_conf,
         audio_label, audio_conf
     fusion_summary = f"""
 <h2>🔎 Multimodal Intelligence Summary</h2>
+{"<br>".join(reasoning)}
 <hr>
+<h3>Fusion Score</h3>
+<span style="color:{color}; font-size:24px; font-weight:bold;">
 {round(fusion_score,2)}
 </span>
 <hr>
+<h3>Interpretation</h3>
+{interpretation}
 <br>
+⏱ Processing Time: {processing_time} sec
 """
+    # ================= SAVE HISTORY =================
     save_analysis({
         "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
         "text": text,
         "image": image_label,
         "audio": audio_label,
+        "transcription": transcription,
+        "fusion_score": round(fusion_score,2)
+    })
+    return fusion_summary, text_result_display, image_result_display, audio_result_display