File size: 10,825 Bytes
6a232ac
 
 
0360059
 
 
6a232ac
1b8b8df
 
 
 
0360059
 
1b8b8df
6a232ac
1b8b8df
 
 
 
6a232ac
0360059
1b8b8df
0360059
1b8b8df
 
0360059
1b8b8df
56f231a
1b8b8df
 
 
56f231a
0360059
1b8b8df
 
 
 
 
0360059
1b8b8df
56f231a
0360059
 
56f231a
1b8b8df
 
 
56f231a
 
0360059
 
 
 
1b8b8df
5d76435
1b8b8df
 
 
 
 
5d76435
 
1b8b8df
ab45c90
1b8b8df
 
 
0360059
 
 
1b8b8df
 
 
56f231a
1b8b8df
 
56f231a
1b8b8df
56f231a
1b8b8df
0360059
 
 
 
70daf96
1b8b8df
 
 
56f231a
1b8b8df
 
ab45c90
f04c010
1b8b8df
0360059
1b8b8df
 
 
 
 
 
 
56f231a
0360059
1b8b8df
235ad1c
0360059
f04c010
1b8b8df
 
f04c010
 
70daf96
1b8b8df
 
 
 
0360059
70daf96
56f231a
1b8b8df
 
 
 
 
 
 
 
 
 
0360059
1b8b8df
0360059
f04c010
1b8b8df
 
 
 
 
 
f04c010
1b8b8df
56f231a
1b8b8df
 
56f231a
1b8b8df
 
 
56f231a
1b8b8df
 
56f231a
 
1b8b8df
 
 
f04c010
1b8b8df
 
 
 
 
 
0360059
1b8b8df
0360059
56f231a
ab45c90
1b8b8df
 
f04c010
 
0360059
1b8b8df
6a232ac
56f231a
4a2e309
56f231a
1b8b8df
 
 
56f231a
1b8b8df
 
 
 
 
 
 
 
 
 
 
 
56f231a
 
 
1b8b8df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56f231a
1b8b8df
 
 
 
 
 
 
 
 
56f231a
1b8b8df
 
56f231a
 
1b8b8df
56f231a
1b8b8df
 
 
 
56f231a
 
1b8b8df
 
 
56f231a
 
 
 
 
1b8b8df
 
56f231a
 
 
1b8b8df
 
56f231a
 
 
 
 
1b8b8df
 
 
 
 
 
 
 
 
 
 
56f231a
 
 
70daf96
1b8b8df
 
 
56f231a
1b8b8df
 
 
0360059
56f231a
1b8b8df
ab45c90
1b8b8df
 
6a232ac
56f231a
1b8b8df
 
 
 
 
 
 
 
 
 
ab45c90
 
1b8b8df
 
 
 
 
70daf96
1b8b8df
 
235ad1c
 
24be874
1b8b8df
 
 
 
 
 
 
70daf96
 
1b8b8df
24be874
1b8b8df
 
 
 
24be874
6a232ac
1b8b8df
 
 
6a232ac
1b8b8df
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
import gradio as gr
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
import kagglehub
import warnings

# Suppress sklearn warnings for cleaner logs
warnings.filterwarnings("ignore")

# --- 1. ARCHITECTURE: H3MOS (Hippocampal Memory & Executive Core) ---

class EpisodicMemory:
    """Mimics Hippocampal retention and retrieval of recent experiences."""
    def __init__(self, capacity=2000):
        self.memory_x = []
        self.memory_y = []
        self.capacity = capacity

    def store(self, x, y):
        # Store on CPU to save GPU VRAM
        curr_x = x.detach().cpu()
        curr_y = y.detach().cpu()
        for i in range(curr_x.size(0)):
            if len(self.memory_x) >= self.capacity:
                self.memory_x.pop(0)
                self.memory_y.pop(0)
            self.memory_x.append(curr_x[i])
            self.memory_y.append(curr_y[i])

    def retrieve(self, query_x, k=5):
        if not self.memory_x:
            return None
        mem_tensor = torch.stack(self.memory_x).to(query_x.device)
        distances = torch.cdist(query_x, mem_tensor)
        top_k_indices = torch.topk(distances, k, largest=False).indices
        
        # Gather labels
        retrieved_y = [torch.stack([self.memory_y[idx] for idx in sample_indices])
                       for sample_indices in top_k_indices]
        return torch.stack(retrieved_y).to(query_x.device)

class H3MOS(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        # Executive Core
        self.executive = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.LayerNorm(hidden_dim),
            nn.GELU(),
            nn.Dropout(0.2),
            nn.Linear(hidden_dim, hidden_dim),
            nn.GELU()
        )
        # Motor Policy
        self.motor = nn.Linear(hidden_dim, output_dim)
        # Hippocampus
        self.hippocampus = EpisodicMemory(capacity=2000)

    def forward(self, x, training_mode=False):
        z = self.executive(x)
        raw_logits = self.motor(z)

        # Fast Path (Training or Empty Memory)
        if training_mode or len(self.hippocampus.memory_x) < 10:
            return raw_logits

        # Memory Retrieval & Integration
        past_labels = self.hippocampus.retrieve(x, k=5)
        if past_labels is None:
             return raw_logits

        mem_votes = torch.zeros_like(raw_logits)
        for i in range(x.size(0)):
            votes = torch.bincount(past_labels[i], minlength=raw_logits.size(1)).float()
            mem_votes[i] = votes

        mem_probs = F.softmax(mem_votes, dim=1)
        
        # Dynamic Gating: 80% Neural, 20% Memory
        return (0.8 * raw_logits) + (0.2 * mem_probs * 5.0)

# --- 2. DATA SETUP & TRAINING PIPELINE ---

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"๐Ÿš€ Initializing System on {device}...")

# Load Data
try:
    path = kagglehub.dataset_download('dewanmukto/social-messages-and-emoji-reactions')
    df = pd.read_csv(path+"/messages_emojis.csv").dropna(subset=['content'])
except Exception as e:
    print("Error loading data:", e)
    # Fallback dummy data if kaggle fails (for testing)
    df = pd.DataFrame({'content': ['test'], 'emoji': ['๐Ÿ‘']})

# Mappings
sent_map = {'โค๏ธ':'Positive', '๐Ÿ‘':'Positive', '๐Ÿ˜‚':'Positive', '๐Ÿ’ฏ':'Positive', '๐Ÿ˜ข':'Negative', '๐Ÿ˜ญ':'Negative', '๐Ÿ˜ฎ':'Neutral'}
intent_map = {'โค๏ธ':'Emotion', '๐Ÿ‘':'Agreement', '๐Ÿ˜‚':'Emotion', '๐Ÿ˜ฎ':'Surprise'}

# Vectorization
tfidf = TfidfVectorizer(max_features=600, stop_words='english')
X_sparse = tfidf.fit_transform(df['content'])
X_dense = torch.FloatTensor(X_sparse.toarray()).to(device)

# Model Zoo Containers
tasks = ['emoji', 'sentiment', 'intent']
model_names = ['DISTIL', 'RandomForest', 'SVM', 'NaiveBayes', 'LogReg', 'GradBoost']
zoo = {task: {} for task in tasks}
encoders = {}

print("๐Ÿง  Training Models... (This may take a moment)")

for task in tasks:
    # Prepare Labels
    if task == 'emoji':
        raw_y = df['emoji'].values
    elif task == 'sentiment':
        raw_y = df['emoji'].apply(lambda x: sent_map.get(x, 'Neutral')).values
    else:
        raw_y = df['emoji'].apply(lambda x: intent_map.get(x, 'Other')).values
        
    le = LabelEncoder()
    y_nums = le.fit_transform(raw_y)
    encoders[task] = le
    
    # 1. Train DISTIL-H3MOS (PyTorch)
    y_tensor = torch.LongTensor(y_nums).to(device)
    output_dim = len(le.classes_)
    
    model = H3MOS(X_dense.shape[1], 64, output_dim).to(device)
    optimizer = torch.optim.AdamW(model.parameters(), lr=0.01)
    
    model.train()
    # Short training loop for demo speed
    for epoch in range(25):
        optimizer.zero_grad()
        out = model(X_dense, training_mode=True)
        loss = F.cross_entropy(out, y_tensor)
        loss.backward()
        optimizer.step()
        # Populate memory occasionally
        if epoch % 5 == 0:
            with torch.no_grad():
                idx = torch.randperm(X_dense.size(0))[:50]
                model.hippocampus.store(X_dense[idx], y_tensor[idx])
    
    model.eval()
    zoo[task]['DISTIL'] = model

    # 2. Train Sklearn Models
    zoo[task]['RandomForest'] = RandomForestClassifier(n_estimators=50).fit(X_sparse, y_nums)
    zoo[task]['SVM'] = SVC(kernel='linear').fit(X_sparse, y_nums)
    zoo[task]['NaiveBayes'] = MultinomialNB().fit(X_sparse, y_nums)
    zoo[task]['LogReg'] = LogisticRegression(max_iter=500).fit(X_sparse, y_nums)
    zoo[task]['GradBoost'] = GradientBoostingClassifier(n_estimators=30).fit(X_sparse, y_nums)

print("โœ… Training Complete.")

# --- 3. INFERENCE LOGIC ---

def get_predictions(text):
    """Runs all models on the text."""
    vec_s = tfidf.transform([text])
    vec_t = torch.FloatTensor(vec_s.toarray()).to(device)
    
    results = {name: {} for name in model_names}
    
    for task in tasks:
        le = encoders[task]
        
        for name in model_names:
            if name == 'DISTIL':
                with torch.no_grad():
                    logits = zoo[task][name](vec_t)
                    pred_idx = torch.argmax(logits, dim=1).item()
                    pred_label = le.inverse_transform([pred_idx])[0]
            else:
                pred_idx = zoo[task][name].predict(vec_s)[0]
                pred_label = le.inverse_transform([pred_idx])[0]
            
            results[name][task] = pred_label
            
    return results

# --- 4. UI STYLING & INTERFACE ---

def get_avatar_url(seed):
    return f"https://api.dicebear.com/7.x/bottts/svg?seed={seed}&backgroundColor=transparent&size=128"

CSS = """
.chat-window { font-family: 'Segoe UI', sans-serif; }

/* User Message Styling */
.user-reactions {
    margin-top: 8px;
    padding-top: 6px;
    border-top: 1px solid rgba(255,255,255,0.3);
    font-size: 1.2em;
    letter-spacing: 4px;
    text-align: right;
    opacity: 0.9;
}

/* Bot Reply Container */
.model-scroll-container {
    display: flex;
    gap: 12px;
    overflow-x: auto;
    padding: 10px 4px;
    scrollbar-width: thin;
}

.model-card {
    background: white;
    min-width: 140px;
    border-radius: 12px;
    padding: 12px;
    box-shadow: 0 4px 12px rgba(0,0,0,0.08);
    display: flex;
    flex-direction: column;
    align-items: center;
    border: 1px solid #eee;
    transition: transform 0.2s;
}
.model-card:hover { transform: translateY(-3px); }

.card-name {
    font-size: 11px;
    font-weight: 700;
    text-transform: uppercase;
    color: #888;
    margin-bottom: 4px;
}

.card-emoji {
    font-size: 28px;
    margin: 4px 0;
}

.card-badge {
    font-size: 10px;
    padding: 2px 8px;
    border-radius: 10px;
    margin-top: 4px;
    font-weight: 600;
}

.bg-Pos { background-color: #e6fffa; color: #2c7a7b; }
.bg-Neg { background-color: #fff5f5; color: #c53030; }
.bg-Neu { background-color: #f7fafc; color: #4a5568; }

.intent-row {
    font-size: 10px;
    color: #666;
    margin-top: 6px;
    border-top: 1px dashed #eee;
    padding-top: 4px;
    width: 100%;
    text-align: center;
}
"""

def chat_logic(message, history):
    if not message:
        return "", history

    preds = get_predictions(message)
    
    # 1. Create User Message HTML (with Emoji Reaction Bar)
    # Order: DISTIL, RF, SVM, NB, LR, GB
    reaction_string = "".join([preds[m]['emoji'] for m in model_names])
    
    user_html = f"""
    <div>
        {message}
        <div class="user-reactions" title="Consensus: {reaction_string}">{reaction_string}</div>
    </div>
    """
    history.append({"role": "user", "content": user_html})

    # 2. Create Single Bot Reply HTML (Horizontal Scroll Cards)
    cards_html = '<div class="model-scroll-container">'
    
    for name in model_names:
        p = preds[name]
        
        # Color coding for sentiment
        sent_cls = "bg-Neu"
        if "Pos" in p['sentiment']: sent_cls = "bg-Pos"
        elif "Neg" in p['sentiment']: sent_cls = "bg-Neg"
        
        cards_html += f"""
        <div class="model-card">
            <div class="card-name">{name}</div>
            <div class="card-emoji">{p['emoji']}</div>
            <div class="card-badge {sent_cls}">{p['sentiment']}</div>
            <div class="intent-row">{p['intent']}</div>
        </div>
        """
    cards_html += "</div>"
    
    history.append({"role": "assistant", "content": cards_html})
    
    return "", history

# --- 5. LAUNCH APP ---

with gr.Blocks(css=CSS, title="SentiChat") as demo:
    gr.Markdown("### ๐Ÿค– Message Analysis")
    gr.Markdown("Type a message to see how different AI/ML architectures interpret it. They were trained on [this dataset](https://www.kaggle.com/datasets/dewanmukto/social-messages-and-emoji-reactions).")
    
    chatbot = gr.Chatbot(
        elem_id="chat-window",
        avatar_images=(None, "https://api.dicebear.com/7.x/bottts/svg?seed=Admin"),
        height=600,
        render_markdown=False # Important to render our custom HTML
    )
    
    with gr.Row():
        txt = gr.Textbox(
            placeholder="Type a message (e.g., 'I cant believe you did that!')", 
            scale=4, 
            show_label=False,
            container=False
        )
        btn = gr.Button("Send โ–ถ Analyze", variant="primary", scale=1)

    # Event bindings
    txt.submit(chat_logic, [txt, chatbot], [txt, chatbot])
    btn.click(chat_logic, [txt, chatbot], [txt, chatbot])

if __name__ == "__main__":
    demo.launch()