pathananas commited on
Commit
13073b4
Β·
verified Β·
1 Parent(s): 610d85e
Files changed (1) hide show
  1. model.py +59 -49
model.py CHANGED
@@ -5,10 +5,11 @@
5
  from database import save_analysis
6
  from datetime import datetime
7
  from transformers import pipeline
 
8
  import torch
9
  import time
10
- from datetime import datetime
11
- from fusion import compute_fusion
12
 
13
  device = 0 if torch.cuda.is_available() else -1
14
 
@@ -34,17 +35,8 @@ audio_pipeline = pipeline(
34
 
35
  print("Models loaded successfully.")
36
 
37
- analysis_history = []
38
-
39
-
40
- def clear_history():
41
- global analysis_history
42
- analysis_history.clear()
43
- return analysis_history
44
-
45
 
46
  def multimodal_analyze(text, image, audio):
47
- global analysis_history
48
 
49
  start_time = time.time()
50
 
@@ -60,12 +52,10 @@ def multimodal_analyze(text, image, audio):
60
  image_result_display = "No image provided."
61
  audio_result_display = "No audio provided."
62
 
63
- # -------- TEXT --------
64
  if text and text.strip():
65
  try:
66
  res = text_pipeline(text)[0]
67
- raw_label = res["label"]
68
- text_conf = round(res["score"] * 100, 2)
69
 
70
  label_map = {
71
  "LABEL_0": "NEGATIVE",
@@ -73,43 +63,54 @@ def multimodal_analyze(text, image, audio):
73
  "LABEL_2": "POSITIVE"
74
  }
75
 
76
- text_label = label_map.get(raw_label, raw_label)
 
77
 
78
  text_result_display = f"""
79
  ## πŸ“ Text Sentiment
80
- **Prediction:** {text_label}
81
- **Confidence:** {text_conf}%
 
82
  """
 
83
  except Exception as e:
84
- text_result_display = f"Text processing error: {str(e)}"
85
 
86
- # -------- IMAGE --------
87
  if image is not None:
88
  try:
89
- class_res = image_pipeline(image)
90
 
91
  image_result_display = "## πŸ–Ό Image Classification\n\n"
92
 
93
- for r in class_res[:3]:
94
  label = r["label"]
95
  conf = round(r["score"] * 100, 2)
96
  image_result_display += f"- **{label}** ({conf}%)\n"
97
 
98
- image_label = class_res[0]["label"]
99
- image_conf = round(class_res[0]["score"] * 100, 2)
 
 
 
 
 
 
 
 
 
 
100
 
101
  except Exception as e:
102
- image_result_display = f"Image processing error: {str(e)}"
103
 
104
- # -------- AUDIO --------
105
  if audio is not None:
106
  try:
107
  res = audio_pipeline(audio)
108
  transcription = res["text"]
109
 
110
- audio_sent = text_pipeline(transcription)[0]
111
- raw_audio_label = audio_sent["label"]
112
- audio_conf = round(audio_sent["score"] * 100, 2)
113
 
114
  label_map = {
115
  "LABEL_0": "NEGATIVE",
@@ -117,21 +118,25 @@ def multimodal_analyze(text, image, audio):
117
  "LABEL_2": "POSITIVE"
118
  }
119
 
120
- audio_label = label_map.get(raw_audio_label, raw_audio_label)
 
121
 
122
  audio_result_display = f"""
123
  ## πŸŽ™ Audio Intelligence
124
- **Transcription:**
 
125
  "{transcription}"
126
 
127
- **Detected Tone:** {audio_label}
128
- ({audio_conf}%)
 
129
  """
 
130
  except Exception as e:
131
- audio_result_display = f"Audio processing error: {str(e)}"
132
 
133
- # -------- FUSION --------
134
- fusion_score, reasoning_lines, alignment_message, color = compute_fusion(
135
  text_label, text_conf,
136
  image_label, image_conf,
137
  audio_label, audio_conf
@@ -141,29 +146,34 @@ def multimodal_analyze(text, image, audio):
141
 
142
  fusion_summary = f"""
143
  <h2>πŸ”Ž Multimodal Intelligence Summary</h2>
144
- {"<br>".join(reasoning_lines)}
 
 
145
  <hr>
146
- <h3>πŸ“Š Fusion Score</h3>
147
- <span style="color:{color}; font-size:22px; font-weight:bold;">
 
148
  {round(fusion_score,2)}
149
  </span>
150
  <hr>
151
- <h3>🧠 Contextual Interpretation</h3>
152
- <p>{alignment_message}</p>
 
 
 
153
  <br>
154
- ⏱ Processing Time: {processing_time} seconds
 
155
  """
 
 
156
  save_analysis({
157
  "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
158
  "text": text,
159
  "image": image_label,
160
  "audio": audio_label,
161
- "fusion_score": round(fusion_score, 2)
162
- })
163
-
164
-
165
 
166
- return fusion_summary, text_result_display, image_result_display, audio_result_display
167
- #temp
168
- #temp
169
-
 
5
  from database import save_analysis
6
  from datetime import datetime
7
  from transformers import pipeline
8
+ from fusion import compute_fusion
9
  import torch
10
  import time
11
+ import base64
12
+ from io import BytesIO
13
 
14
  device = 0 if torch.cuda.is_available() else -1
15
 
 
35
 
36
  print("Models loaded successfully.")
37
 
 
 
 
 
 
 
 
 
38
 
39
  def multimodal_analyze(text, image, audio):
 
40
 
41
  start_time = time.time()
42
 
 
52
  image_result_display = "No image provided."
53
  audio_result_display = "No audio provided."
54
 
55
+ # ================= TEXT =================
56
  if text and text.strip():
57
  try:
58
  res = text_pipeline(text)[0]
 
 
59
 
60
  label_map = {
61
  "LABEL_0": "NEGATIVE",
 
63
  "LABEL_2": "POSITIVE"
64
  }
65
 
66
+ text_label = label_map.get(res["label"], res["label"])
67
+ text_conf = round(res["score"] * 100, 2)
68
 
69
  text_result_display = f"""
70
  ## πŸ“ Text Sentiment
71
+ Prediction: **{text_label}**
72
+
73
+ Confidence: **{text_conf}%**
74
  """
75
+
76
  except Exception as e:
77
+ text_result_display = f"Text error: {str(e)}"
78
 
79
+ # ================= IMAGE =================
80
  if image is not None:
81
  try:
82
+ results = image_pipeline(image)
83
 
84
  image_result_display = "## πŸ–Ό Image Classification\n\n"
85
 
86
+ for r in results[:3]:
87
  label = r["label"]
88
  conf = round(r["score"] * 100, 2)
89
  image_result_display += f"- **{label}** ({conf}%)\n"
90
 
91
+ image_label = results[0]["label"]
92
+ image_conf = round(results[0]["score"] * 100, 2)
93
+
94
+ # image preview
95
+ buffer = BytesIO()
96
+ image.save(buffer, format="PNG")
97
+ img_str = base64.b64encode(buffer.getvalue()).decode()
98
+
99
+ image_result_display += f"""
100
+ <br>
101
+ <img src="data:image/png;base64,{img_str}" width="200" style="border-radius:10px;">
102
+ """
103
 
104
  except Exception as e:
105
+ image_result_display = f"Image error: {str(e)}"
106
 
107
+ # ================= AUDIO =================
108
  if audio is not None:
109
  try:
110
  res = audio_pipeline(audio)
111
  transcription = res["text"]
112
 
113
+ sent = text_pipeline(transcription)[0]
 
 
114
 
115
  label_map = {
116
  "LABEL_0": "NEGATIVE",
 
118
  "LABEL_2": "POSITIVE"
119
  }
120
 
121
+ audio_label = label_map.get(sent["label"], sent["label"])
122
+ audio_conf = round(sent["score"] * 100, 2)
123
 
124
  audio_result_display = f"""
125
  ## πŸŽ™ Audio Intelligence
126
+
127
+ Transcription:
128
  "{transcription}"
129
 
130
+ Tone: **{audio_label}**
131
+
132
+ Confidence: **{audio_conf}%**
133
  """
134
+
135
  except Exception as e:
136
+ audio_result_display = f"Audio error: {str(e)}"
137
 
138
+ # ================= FUSION =================
139
+ fusion_score, reasoning, interpretation, color = compute_fusion(
140
  text_label, text_conf,
141
  image_label, image_conf,
142
  audio_label, audio_conf
 
146
 
147
  fusion_summary = f"""
148
  <h2>πŸ”Ž Multimodal Intelligence Summary</h2>
149
+
150
+ {"<br>".join(reasoning)}
151
+
152
  <hr>
153
+
154
+ <h3>Fusion Score</h3>
155
+ <span style="color:{color}; font-size:24px; font-weight:bold;">
156
  {round(fusion_score,2)}
157
  </span>
158
  <hr>
159
+
160
+ <h3>Interpretation</h3>
161
+
162
+ {interpretation}
163
+
164
  <br>
165
+
166
+ ⏱ Processing Time: {processing_time} sec
167
  """
168
+
169
+ # ================= SAVE HISTORY =================
170
  save_analysis({
171
  "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
172
  "text": text,
173
  "image": image_label,
174
  "audio": audio_label,
175
+ "transcription": transcription,
176
+ "fusion_score": round(fusion_score,2)
177
+ })
 
178
 
179
+ return fusion_summary, text_result_display, image_result_display, audio_result_display