dwmk commited on
Commit
0360059
ยท
verified ยท
1 Parent(s): 051ef5a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +174 -90
app.py CHANGED
@@ -1,125 +1,209 @@
1
  import gradio as gr
2
  import pandas as pd
3
  import numpy as np
4
- import time
5
- import random
 
6
  from sklearn.feature_extraction.text import TfidfVectorizer
7
- from sklearn.ensemble import RandomForestClassifier
 
8
  from sklearn.svm import SVC
9
  from sklearn.naive_bayes import MultinomialNB
 
 
 
 
 
 
10
 
11
- # 1. SETUP DATA AND MAPPINGS (From your Notebook)
12
- SENT_MAP = {'โค๏ธ':'Pos', '๐Ÿ‘':'Pos', '๐Ÿ˜‚':'Pos', '๐Ÿ’ฏ':'Pos', '๐Ÿ˜ข':'Neg', '๐Ÿ˜ญ':'Neg', '๐Ÿ˜ฎ':'Neu'}
13
- INTENT_MAP = {'โค๏ธ':'Emotion', '๐Ÿ‘':'Agreement', '๐Ÿ˜‚':'Emotion', '๐Ÿ˜ฎ':'Surprise'}
14
 
15
- def get_sent(emoji): return SENT_MAP.get(emoji, 'Neutral')
16
- def get_intent(emoji): return INTENT_MAP.get(emoji, 'Other')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
- # Mock Data for training (As the CSV isn't provided, we use a small seed set)
19
- # In your Space, you can replace this by loading your actual CSV
20
- data = {
21
- 'content': ["I love this!", "That is so sad", "Wow really?", "I agree with you", "LMAO", "I am angry"],
22
- 'emoji': ["โค๏ธ", "๐Ÿ˜ข", "๐Ÿ˜ฎ", "๐Ÿ‘", "๐Ÿ˜‚", "๐Ÿ˜ข"]
23
- }
24
- df = pd.DataFrame(data)
25
- df['sentiment'] = df['emoji'].apply(get_sent)
26
- df['intent'] = df['emoji'].apply(get_intent)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
- # 2. TRAIN MODELS
29
- tfidf = TfidfVectorizer(max_features=1000)
 
 
 
 
 
 
 
30
  X = tfidf.fit_transform(df['content'])
31
 
32
- # Emoji Models
33
- model_rf_emoji = RandomForestClassifier().fit(X, df['emoji'])
34
- model_svm_emoji = SVC(probability=True).fit(X, df['emoji'])
35
- model_nb_emoji = MultinomialNB().fit(X, df['emoji'])
36
-
37
- # Sentiment & Intent Models (Using Random Forest as base)
38
- model_sent = RandomForestClassifier().fit(X, df['sentiment'])
39
- model_intent = RandomForestClassifier().fit(X, df['intent'])
40
-
41
- # 3. HELPER FUNCTIONS
42
- def get_avatar(name):
43
- return f"https://api.dicebear.com/7.x/avataaars/svg?seed={name}"
44
-
45
- def predict_all(text):
46
- vec = tfidf.transform([text])
47
- return {
48
- "Random Forest": model_rf_emoji.predict(vec)[0],
49
- "SVM": model_svm_emoji.predict(vec)[0],
50
- "Naive Bayes": model_nb_emoji.predict(vec)[0],
51
- "Sentiment": model_sent.predict(vec)[0],
52
- "Intent": model_intent.predict(vec)[0]
53
- }
54
-
55
- # 4. CHAT LOGIC
56
- def chat_response(message, history):
57
- preds = predict_all(message)
58
-
59
- # User's message with reactions
60
- # We format reactions as a small row at the bottom
61
- reactions_html = f"""
62
- <div style='display: flex; gap: 5px; margin-top: 8px; font-size: 0.8em;'>
63
- <span title='RF: {preds["Random Forest"]} | SVM: {preds["SVM"]} | NB: {preds["Naive Bayes"]}'
64
- style='background: #f0f2f5; padding: 2px 8px; border-radius: 12px; cursor: help;'>
65
- {preds["Random Forest"]} {preds["SVM"]} {preds["Naive Bayes"]}
66
- </span>
67
- </div>
68
- """
69
- history.append({"role": "user", "content": message + reactions_html})
70
- yield history
71
 
72
- # Simulate group chat replies
73
- bots = [
74
- {"name": "The Forester (RF)", "avatar": get_avatar("Forester")},
75
- {"name": "The Maverick (SVM)", "avatar": get_avatar("Maverick")},
76
- {"name": "The Sage (NB)", "avatar": get_avatar("Sage")}
77
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
- random.shuffle(bots)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
- for bot in bots:
82
- # Random delay for typing simulation
 
 
 
 
 
 
 
 
83
  time.sleep(random.uniform(0.5, 1.5))
84
 
85
- bot_msg = f"**Sentiment:** {preds['Sentiment']} \n**Intent:** {preds['Intent']}"
 
 
 
 
 
 
 
 
 
 
 
86
  history.append({
87
  "role": "assistant",
88
- "content": bot_msg,
89
- "metadata": {"title": bot["name"]},
90
- "avatar": bot["avatar"]
91
  })
92
  yield history
93
 
94
- # 5. UI CONSTRUCTION
95
- custom_css = """
96
- .gradio-container { background-color: #e5ddd5; } /* WhatsApp Background */
97
- .message-row { font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; }
98
- """
99
-
100
- with gr.Blocks(css=custom_css) as demo:
101
- gr.Markdown("# ๐Ÿ“ฑ Multi-Model Emoji Group Chat")
102
- gr.Markdown("Type a message to see how different models react and respond based on sentiment and intent.")
103
 
104
  chatbot = gr.Chatbot(
105
- bubble_full_width=False,
106
- show_label=False,
107
  type="messages",
108
- avatar_images=(None, "https://api.dicebear.com/7.x/avataaars/svg?seed=User")
 
109
  )
110
 
111
  with gr.Row():
112
  txt = gr.Textbox(
113
  show_label=False,
114
- placeholder="Type a message...",
115
  scale=4
116
  )
117
- submit_btn = gr.Button("Send", variant="primary")
118
 
119
- # Hooking up logic
120
- txt.submit(chat_response, [txt, chatbot], [chatbot])
121
- submit_btn.click(chat_response, [txt, chatbot], [chatbot])
122
- txt.submit(lambda: "", None, [txt]) # Clear textbox
123
- submit_btn.click(lambda: "", None, [txt])
124
 
125
  demo.launch()
 
1
  import gradio as gr
2
  import pandas as pd
3
  import numpy as np
4
+ import torch
5
+ import torch.nn as nn
6
+ import torch.nn.functional as F
7
  from sklearn.feature_extraction.text import TfidfVectorizer
8
+ from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
9
+ from sklearn.linear_model import LogisticRegression
10
  from sklearn.svm import SVC
11
  from sklearn.naive_bayes import MultinomialNB
12
+ from sklearn.preprocessing import LabelEncoder
13
+ from sklearn.utils import Bunch
14
+ import kagglehub
15
+ import time
16
+ import random
17
+ import threading
18
 
19
+ # --- 1. CORE MODEL LOGIC (Ported from your script) ---
 
 
20
 
21
+ class EpisodicMemory:
22
+ def __init__(self, capacity=2000):
23
+ self.memory_x, self.memory_y = [], []
24
+ self.capacity = capacity
25
+ def store(self, x, y):
26
+ curr_x, curr_y = x.detach().cpu(), y.detach().cpu()
27
+ for i in range(curr_x.size(0)):
28
+ if len(self.memory_x) >= self.capacity:
29
+ self.memory_x.pop(0); self.memory_y.pop(0)
30
+ self.memory_x.append(curr_x[i]); self.memory_y.append(curr_y[i])
31
+ def retrieve(self, query_x, k=5):
32
+ if len(self.memory_x) < k: return None
33
+ mem_tensor = torch.stack(self.memory_x).to(query_x.device)
34
+ distances = torch.cdist(query_x, mem_tensor)
35
+ top_k_indices = torch.topk(distances, k, largest=False).indices
36
+ retrieved_y = [torch.stack([self.memory_y[idx] for idx in sample_indices]) for sample_indices in top_k_indices]
37
+ return torch.stack(retrieved_y).to(query_x.device)
38
 
39
+ class ExecutiveCore(nn.Module):
40
+ def __init__(self, input_dim, hidden_dim):
41
+ super().__init__()
42
+ self.net = nn.Sequential(nn.Linear(input_dim, hidden_dim), nn.LayerNorm(hidden_dim), nn.GELU(), nn.Dropout(0.2), nn.Linear(hidden_dim, hidden_dim), nn.GELU())
43
+ def forward(self, x): return self.net(x)
44
+
45
+ class MotorPolicy(nn.Module):
46
+ def __init__(self, hidden_dim, output_dim):
47
+ super().__init__()
48
+ self.fc = nn.Linear(hidden_dim, output_dim)
49
+ def forward(self, x): return self.fc(x)
50
+
51
+ class H3MOS(nn.Module):
52
+ def __init__(self, input_dim, hidden_dim, output_dim):
53
+ super().__init__()
54
+ self.executive = ExecutiveCore(input_dim, hidden_dim)
55
+ self.motor = MotorPolicy(hidden_dim, output_dim)
56
+ self.hippocampus = EpisodicMemory()
57
+ def forward(self, x, training_mode=False):
58
+ z = self.executive(x)
59
+ if training_mode or len(self.hippocampus.memory_x) < 10: return self.motor(z)
60
+ past_labels = self.hippocampus.retrieve(x, k=5)
61
+ raw_logits = self.motor(z)
62
+ mem_votes = torch.zeros_like(raw_logits)
63
+ for i in range(x.size(0)):
64
+ votes = torch.bincount(past_labels[i], minlength=raw_logits.size(1)).float()
65
+ mem_votes[i] = votes
66
+ return (0.8 * raw_logits) + (0.2 * F.softmax(mem_votes, dim=1) * 5.0)
67
 
68
+ # --- 2. DATA & TRAINING SETUP ---
69
+
70
+ print("Downloading dataset and training models...")
71
+ path = kagglehub.dataset_download('dewanmukto/social-messages-and-emoji-reactions')
72
+ df = pd.read_csv(path+"/messages_emojis.csv").dropna(subset=['content'])
73
+
74
+ sent_map = {'โค๏ธ':'Pos', '๐Ÿ‘':'Pos', '๐Ÿ˜‚':'Pos', '๐Ÿ’ฏ':'Pos', '๐Ÿ˜ข':'Neg', '๐Ÿ˜ญ':'Neg', '๐Ÿ˜ฎ':'Neu'}
75
+ intent_map = {'โค๏ธ':'Emotion', '๐Ÿ‘':'Agreement', '๐Ÿ˜‚':'Emotion', '๐Ÿ˜ฎ':'Surprise'}
76
+ tfidf = TfidfVectorizer(max_features=1000, stop_words='english')
77
  X = tfidf.fit_transform(df['content'])
78
 
79
+ targets = {
80
+ 'emoji': df['emoji'].values,
81
+ 'sentiment': df['emoji'].apply(lambda x: sent_map.get(x, 'Neutral')).values,
82
+ 'intent': df['emoji'].apply(lambda x: intent_map.get(x, 'Other')).values
83
+ }
84
+
85
+ model_zoo = {}
86
+ encoders = {}
87
+ algs = {
88
+ "RandomForest": RandomForestClassifier(n_estimators=50),
89
+ "SVM": SVC(kernel='linear', probability=True),
90
+ "NaiveBayes": MultinomialNB(),
91
+ "LogReg": LogisticRegression(max_iter=500),
92
+ "DISTIL-H3MOS": "torch"
93
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
+ for task, y in targets.items():
96
+ le = LabelEncoder()
97
+ y_enc = le.fit_transform(y)
98
+ encoders[task] = le
99
+ for name, clf in algs.items():
100
+ if name not in model_zoo: model_zoo[name] = {}
101
+ if name == "DISTIL-H3MOS":
102
+ model = H3MOS(X.shape[1], 64, len(le.classes_))
103
+ # (Simplified training for demo speed)
104
+ optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
105
+ X_t = torch.FloatTensor(X.toarray())
106
+ y_t = torch.LongTensor(y_enc)
107
+ for _ in range(20):
108
+ optimizer.zero_grad(); loss = F.cross_entropy(model(X_t, True), y_t); loss.backward(); optimizer.step()
109
+ model_zoo[name][task] = model
110
+ else:
111
+ clf.fit(X, y)
112
+ model_zoo[name][task] = clf
113
+
114
+ # --- 3. GRADIO UI & CHAT LOGIC ---
115
+
116
+ # Dicebear Avatars
117
+ def get_avatar(seed):
118
+ return f"https://api.dicebear.com/7.x/adventurer/svg?seed={seed}"
119
+
120
+ CSS = """
121
+ .reaction-pill {
122
+ background: rgba(255, 255, 255, 0.8);
123
+ border-radius: 12px;
124
+ padding: 2px 8px;
125
+ font-size: 14px;
126
+ margin-top: 5px;
127
+ display: inline-block;
128
+ border: 1px solid #ddd;
129
+ cursor: help;
130
+ }
131
+ .chat-window { border-radius: 15px; }
132
+ """
133
+
134
+ def chat_interface(message, history):
135
+ if not message: return "", history
136
+
137
+ # 1. Process Reactions for User Message
138
+ vec = tfidf.transform([message])
139
+ vec_t = torch.FloatTensor(vec.toarray())
140
 
141
+ reactions = []
142
+ reaction_details = "Models reacted: "
143
+ for name in model_zoo.keys():
144
+ if name == "DISTIL-H3MOS":
145
+ with torch.no_grad():
146
+ res = torch.argmax(model_zoo[name]['emoji'](vec_t)).item()
147
+ emoji = encoders['emoji'].inverse_transform([res])[0]
148
+ else:
149
+ emoji = model_zoo[name]['emoji'].predict(vec)[0]
150
+ reactions.append(emoji)
151
+ reaction_details += f"{name} ({emoji}) "
152
+
153
+ # Most frequent emoji as primary reaction
154
+ primary_emoji = max(set(reactions), key=reactions.count)
155
+ reaction_html = f"<div class='reaction-pill' title='{reaction_details}'>{primary_emoji} {len(reactions)}</div>"
156
 
157
+ user_msg_formatted = f"{message}<br>{reaction_html}"
158
+ history.append({"role": "user", "content": user_msg_formatted})
159
+ yield history
160
+
161
+ # 2. Simulate Bot Responses
162
+ active_models = ["DISTIL-H3MOS", "RandomForest", "LogReg", "SVM"]
163
+ random.shuffle(active_models)
164
+
165
+ for bot in active_models:
166
+ # Simulate typing delay
167
  time.sleep(random.uniform(0.5, 1.5))
168
 
169
+ # Predict Sentiment/Intent
170
+ if bot == "DISTIL-H3MOS":
171
+ with torch.no_grad():
172
+ s_idx = torch.argmax(model_zoo[bot]['sentiment'](vec_t)).item()
173
+ i_idx = torch.argmax(model_zoo[bot]['intent'](vec_t)).item()
174
+ sent = encoders['sentiment'].inverse_transform([s_idx])[0]
175
+ intent = encoders['intent'].inverse_transform([i_idx])[0]
176
+ else:
177
+ sent = model_zoo[bot]['sentiment'].predict(vec)[0]
178
+ intent = model_zoo[bot]['intent'].predict(vec)[0]
179
+
180
+ bot_content = f"**Sentiment:** {sent} | **Intent:** {intent}"
181
  history.append({
182
  "role": "assistant",
183
+ "content": bot_content,
184
+ "metadata": {"title": f"{bot}", "avatar": get_avatar(bot)}
 
185
  })
186
  yield history
187
 
188
+ with gr.Blocks(css=CSS) as demo:
189
+ gr.Markdown("# ๐Ÿค– Multitask Model Group Chat\n*A benchmark-turned-chat app featuring H3MOS, RF, and SVM.*")
 
 
 
 
 
 
 
190
 
191
  chatbot = gr.Chatbot(
192
+ elem_id="chat-window",
 
193
  type="messages",
194
+ avatar_images=(None, "https://api.dicebear.com/7.x/bottts/svg?seed=User"),
195
+ bubble_full_width=False
196
  )
197
 
198
  with gr.Row():
199
  txt = gr.Textbox(
200
  show_label=False,
201
+ placeholder="Type a message to see how the models react...",
202
  scale=4
203
  )
204
+ submit_btn = gr.Button("Send", variant="primary", scale=1)
205
 
206
+ txt.submit(chat_interface, [txt, chatbot], [txt, chatbot])
207
+ submit_btn.click(chat_interface, [txt, chatbot], [txt, chatbot])
 
 
 
208
 
209
  demo.launch()