Spaces:

asenturisk
/

SentiChat

Sleeping

App Files Files Community

dwmk commited on Jan 26

Commit

56f231a

verified ·

1 Parent(s): 4a2e309

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -157

app.py CHANGED Viewed

@@ -12,7 +12,6 @@ from sklearn.linear_model import LogisticRegression
 from sklearn.preprocessing import LabelEncoder
 import kagglehub
 import warnings
-import random
 # Suppress sklearn warnings for cleaner logs
 warnings.filterwarnings("ignore")
@@ -24,53 +23,30 @@ class EpisodicMemory:
     def __init__(self, capacity=2000):
         self.memory_x = []
         self.memory_y = []
-        self.memory_text = [] # New: Store raw text for replies
         self.capacity = capacity
-    def store(self, x, y, text_content):
         # Store on CPU to save GPU VRAM
         curr_x = x.detach().cpu()
         curr_y = y.detach().cpu()
-        # Handle batch or single item
-        if len(curr_x.shape) > 1:
-            batch_size = curr_x.size(0)
-        else:
-            batch_size = 1
-            curr_x = curr_x.unsqueeze(0)
-            curr_y = curr_y.unsqueeze(0)
-            text_content = [text_content]
-        for i in range(batch_size):
             if len(self.memory_x) >= self.capacity:
                 self.memory_x.pop(0)
                 self.memory_y.pop(0)
-                self.memory_text.pop(0)
             self.memory_x.append(curr_x[i])
             self.memory_y.append(curr_y[i])
-            # Store corresponding text (handle potential index mismatch in loops)
-            txt = text_content[i] if isinstance(text_content, list) else text_content
-            self.memory_text.append(txt)
     def retrieve(self, query_x, k=5):
         if not self.memory_x:
-            return None, None
         mem_tensor = torch.stack(self.memory_x).to(query_x.device)
         distances = torch.cdist(query_x, mem_tensor)
-        top_k = torch.topk(distances, k, largest=False)
-        indices = top_k.indices
         # Gather labels
         retrieved_y = [torch.stack([self.memory_y[idx] for idx in sample_indices])
-                       for sample_indices in indices]
-        # Gather text for the "Best Match" (closest neighbor)
-        # We take the nearest neighbor (index 0 of top k) for the reply
-        closest_indices = indices[:, 0].cpu().tolist()
-        retrieved_text = [self.memory_text[idx] for idx in closest_indices]
-        return torch.stack(retrieved_y).to(query_x.device), retrieved_text
 class H3MOS(nn.Module):
     def __init__(self, input_dim, hidden_dim, output_dim):
@@ -95,12 +71,12 @@ class H3MOS(nn.Module):
         # Fast Path (Training or Empty Memory)
         if training_mode or len(self.hippocampus.memory_x) < 10:
-            return raw_logits, None
         # Memory Retrieval & Integration
-        past_labels, retrieved_texts = self.hippocampus.retrieve(x, k=5)
         if past_labels is None:
-             return raw_logits, None
         mem_votes = torch.zeros_like(raw_logits)
         for i in range(x.size(0)):
@@ -110,9 +86,7 @@ class H3MOS(nn.Module):
         mem_probs = F.softmax(mem_votes, dim=1)
         # Dynamic Gating: 80% Neural, 20% Memory
-        final_logits = (0.8 * raw_logits) + (0.2 * mem_probs * 5.0)
-        return final_logits, retrieved_texts
 # --- 2. DATA SETUP & TRAINING PIPELINE ---
@@ -126,7 +100,7 @@ try:
 except Exception as e:
     print("Error loading data:", e)
     # Fallback dummy data if kaggle fails (for testing)
-    df = pd.DataFrame({'content': ['test', 'good job', 'bad day'], 'emoji': ['👍', '❤️', '😭']})
 # Mappings
 sent_map = {'❤️':'Positive', '👍':'Positive', '😂':'Positive', '💯':'Positive', '😢':'Negative', '😭':'Negative', '😮':'Neutral'}
@@ -137,24 +111,13 @@ tfidf = TfidfVectorizer(max_features=600, stop_words='english')
 X_sparse = tfidf.fit_transform(df['content'])
 X_dense = torch.FloatTensor(X_sparse.toarray()).to(device)
-# Reply Bank Construction (For non-neural models)
-# We organize valid "utterances" by their emoji label to simulate responses
-reply_bank = {}
-unique_emojis = df['emoji'].unique()
-for emo in unique_emojis:
-    # Filter messages that resulted in this emoji
-    msgs = df[df['emoji'] == emo]['content'].tolist()
-    # Keep short, punchy replies
-    msgs = [m for m in msgs if len(m.split()) < 15]
-    reply_bank[emo] = msgs if msgs else ["Interesting."]
 # Model Zoo Containers
 tasks = ['emoji', 'sentiment', 'intent']
 model_names = ['DISTIL', 'RandomForest', 'SVM', 'NaiveBayes', 'LogReg', 'GradBoost']
 zoo = {task: {} for task in tasks}
 encoders = {}
-print("🧠 Training Models & Encoding Memories... (This may take a moment)")
 for task in tasks:
     # Prepare Labels
@@ -177,23 +140,18 @@ for task in tasks:
     optimizer = torch.optim.AdamW(model.parameters(), lr=0.01)
     model.train()
-    # Short training loop
     for epoch in range(25):
         optimizer.zero_grad()
-        out, _ = model(X_dense, training_mode=True)
         loss = F.cross_entropy(out, y_tensor)
         loss.backward()
         optimizer.step()
-        # Populate memory: DISTIL learns by storing training examples
-        # We store 10% of data per epoch to build the "brain"
         if epoch % 5 == 0:
             with torch.no_grad():
-                # Random sample indices
-                idx = torch.randperm(X_dense.size(0))[:100]
-                # Store Vector + Label + Actual Text Content
-                batch_text = df.iloc[idx.cpu().numpy()]['content'].tolist()
-                model.hippocampus.store(X_dense[idx], y_tensor[idx], batch_text)
     model.eval()
     zoo[task]['DISTIL'] = model
@@ -207,24 +165,7 @@ for task in tasks:
 print("✅ Training Complete.")
-# --- 3. INFERENCE & GENERATION LOGIC ---
-def generate_reply(model_name, predicted_emoji, distil_retrieved_text=None):
-    """
-    Generates a text reply.
-    - DISTIL uses Associative Recall (nearest neighbor text).
-    - Others use Random Sampling from the Reply Bank based on their prediction.
-    """
-    try:
-        if model_name == 'DISTIL' and distil_retrieved_text:
-            # H3MOS echoes a memory that feels "associatively related"
-            return f"\"{distil_retrieved_text}\""
-        # Standard models pick a vibe-matched message from the dataset
-        candidates = reply_bank.get(predicted_emoji, ["I don't know what to say."])
-        return f"\"{random.choice(candidates)}\""
-    except:
-        return "..."
 def get_predictions(text):
     """Runs all models on the text."""
@@ -233,41 +174,13 @@ def get_predictions(text):
     results = {name: {} for name in model_names}
-    # 1. First, get Emoji predictions (Primary task for replies)
-    emoji_preds = {}
-    distil_text_memory = None
-    # Run Emoji Task first to determine the reply "Vibe"
-    task = 'emoji'
-    le = encoders[task]
-    for name in model_names:
-        if name == 'DISTIL':
-            with torch.no_grad():
-                logits, mem_texts = zoo[task][name](vec_t)
-                pred_idx = torch.argmax(logits, dim=1).item()
-                pred_label = le.inverse_transform([pred_idx])[0]
-                # Capture the memory text for DISTIL
-                if mem_texts: distil_text_memory = mem_texts[0]
-        else:
-            pred_idx = zoo[task][name].predict(vec_s)[0]
-            pred_label = le.inverse_transform([pred_idx])[0]
-        emoji_preds[name] = pred_label
-        results[name]['emoji'] = pred_label
-        # GENERATE TEXT REPLY
-        # We pass the memory text if it's DISTIL, otherwise None
-        mem_txt = distil_text_memory if name == 'DISTIL' else None
-        results[name]['reply'] = generate_reply(name, pred_label, mem_txt)
-    # 2. Run other tasks (Sentiment/Intent) just for labels
-    for task in ['sentiment', 'intent']:
         le = encoders[task]
         for name in model_names:
             if name == 'DISTIL':
                 with torch.no_grad():
-                    logits, _ = zoo[task][name](vec_t)
                     pred_idx = torch.argmax(logits, dim=1).item()
                     pred_label = le.inverse_transform([pred_idx])[0]
             else:
@@ -280,6 +193,9 @@ def get_predictions(text):
 # --- 4. UI STYLING & INTERFACE ---
 CSS = """
 .chat-window { font-family: 'Segoe UI', sans-serif; }
@@ -305,8 +221,7 @@ CSS = """
 .model-card {
     background: white;
-    min-width: 160px; /* Wider to fit text */
-    max-width: 160px;
     border-radius: 12px;
     padding: 12px;
     box-shadow: 0 4px 12px rgba(0,0,0,0.08);
@@ -315,60 +230,40 @@ CSS = """
     align-items: center;
     border: 1px solid #eee;
     transition: transform 0.2s;
-    position: relative;
 }
-.model-card:hover { transform: translateY(-3px); border-color: #cbd5e0; }
 .card-name {
-    font-size: 10px;
-    font-weight: 800;
     text-transform: uppercase;
-    color: #a0aec0;
     margin-bottom: 4px;
-    letter-spacing: 1px;
 }
 .card-emoji {
-    font-size: 32px;
-    margin: 2px 0;
-    line-height: 1;
-}
-/* The generated reply bubble */
-.card-reply {
-    font-size: 11px;
-    color: #2d3748;
-    background: #edf2f7;
-    padding: 6px 8px;
-    border-radius: 8px;
-    margin: 8px 0;
-    text-align: center;
-    font-style: italic;
-    min-height: 40px;
-    display: flex;
-    align-items: center;
-    justify-content: center;
-    line-height: 1.2;
-    width: 100%;
 }
 .card-badge {
-    font-size: 9px;
-    padding: 2px 6px;
-    border-radius: 4px;
-    margin-top: auto; /* Push to bottom */
-    font-weight: 700;
-    text-transform: uppercase;
 }
-.bg-Pos { background-color: #c6f6d5; color: #22543d; }
-.bg-Neg { background-color: #fed7d7; color: #742a2a; }
-.bg-Neu { background-color: #e2e8f0; color: #4a5568; }
 .intent-row {
-    font-size: 9px;
-    color: #718096;
-    margin-top: 4px;
     width: 100%;
     text-align: center;
 }
@@ -380,26 +275,25 @@ def chat_logic(message, history):
     preds = get_predictions(message)
-    # 1. Create User Message HTML (with Emoji Consensus)
-    # Simple majority voting for the "Consensus" bar
-    emojis = [preds[m]['emoji'] for m in model_names]
-    reaction_string = "".join(emojis)
     user_html = f"""
     <div>
         {message}
-        <div class="user-reactions" title="Consensus">{reaction_string}</div>
     </div>
     """
     history.append({"role": "user", "content": user_html})
-    # 2. Create Scrollable Bot Reply HTML
     cards_html = '<div class="model-scroll-container">'
     for name in model_names:
         p = preds[name]
-        # Color coding
         sent_cls = "bg-Neu"
         if "Pos" in p['sentiment']: sent_cls = "bg-Pos"
         elif "Neg" in p['sentiment']: sent_cls = "bg-Neg"
@@ -408,7 +302,6 @@ def chat_logic(message, history):
         <div class="model-card">
             <div class="card-name">{name}</div>
             <div class="card-emoji">{p['emoji']}</div>
-            <div class="card-reply">{p['reply']}</div>
             <div class="card-badge {sent_cls}">{p['sentiment']}</div>
             <div class="intent-row">{p['intent']}</div>
         </div>

 from sklearn.preprocessing import LabelEncoder
 import kagglehub
 import warnings
 # Suppress sklearn warnings for cleaner logs
 warnings.filterwarnings("ignore")
     def __init__(self, capacity=2000):
         self.memory_x = []
         self.memory_y = []
         self.capacity = capacity
+    def store(self, x, y):
         # Store on CPU to save GPU VRAM
         curr_x = x.detach().cpu()
         curr_y = y.detach().cpu()
+        for i in range(curr_x.size(0)):
             if len(self.memory_x) >= self.capacity:
                 self.memory_x.pop(0)
                 self.memory_y.pop(0)
             self.memory_x.append(curr_x[i])
             self.memory_y.append(curr_y[i])
     def retrieve(self, query_x, k=5):
         if not self.memory_x:
+            return None
         mem_tensor = torch.stack(self.memory_x).to(query_x.device)
         distances = torch.cdist(query_x, mem_tensor)
+        top_k_indices = torch.topk(distances, k, largest=False).indices
         # Gather labels
         retrieved_y = [torch.stack([self.memory_y[idx] for idx in sample_indices])
+                       for sample_indices in top_k_indices]
+        return torch.stack(retrieved_y).to(query_x.device)
 class H3MOS(nn.Module):
     def __init__(self, input_dim, hidden_dim, output_dim):
         # Fast Path (Training or Empty Memory)
         if training_mode or len(self.hippocampus.memory_x) < 10:
+            return raw_logits
         # Memory Retrieval & Integration
+        past_labels = self.hippocampus.retrieve(x, k=5)
         if past_labels is None:
+             return raw_logits
         mem_votes = torch.zeros_like(raw_logits)
         for i in range(x.size(0)):
         mem_probs = F.softmax(mem_votes, dim=1)
         # Dynamic Gating: 80% Neural, 20% Memory
+        return (0.8 * raw_logits) + (0.2 * mem_probs * 5.0)
 # --- 2. DATA SETUP & TRAINING PIPELINE ---
 except Exception as e:
     print("Error loading data:", e)
     # Fallback dummy data if kaggle fails (for testing)
+    df = pd.DataFrame({'content': ['test'], 'emoji': ['👍']})
 # Mappings
 sent_map = {'❤️':'Positive', '👍':'Positive', '😂':'Positive', '💯':'Positive', '😢':'Negative', '😭':'Negative', '😮':'Neutral'}
 X_sparse = tfidf.fit_transform(df['content'])
 X_dense = torch.FloatTensor(X_sparse.toarray()).to(device)
 # Model Zoo Containers
 tasks = ['emoji', 'sentiment', 'intent']
 model_names = ['DISTIL', 'RandomForest', 'SVM', 'NaiveBayes', 'LogReg', 'GradBoost']
 zoo = {task: {} for task in tasks}
 encoders = {}
+print("🧠 Training Models... (This may take a moment)")
 for task in tasks:
     # Prepare Labels
     optimizer = torch.optim.AdamW(model.parameters(), lr=0.01)
     model.train()
+    # Short training loop for demo speed
     for epoch in range(25):
         optimizer.zero_grad()
+        out = model(X_dense, training_mode=True)
         loss = F.cross_entropy(out, y_tensor)
         loss.backward()
         optimizer.step()
+        # Populate memory occasionally
         if epoch % 5 == 0:
             with torch.no_grad():
+                idx = torch.randperm(X_dense.size(0))[:50]
+                model.hippocampus.store(X_dense[idx], y_tensor[idx])
     model.eval()
     zoo[task]['DISTIL'] = model
 print("✅ Training Complete.")
+# --- 3. INFERENCE LOGIC ---
 def get_predictions(text):
     """Runs all models on the text."""
     results = {name: {} for name in model_names}
+    for task in tasks:
         le = encoders[task]
         for name in model_names:
             if name == 'DISTIL':
                 with torch.no_grad():
+                    logits = zoo[task][name](vec_t)
                     pred_idx = torch.argmax(logits, dim=1).item()
                     pred_label = le.inverse_transform([pred_idx])[0]
             else:
 # --- 4. UI STYLING & INTERFACE ---
+def get_avatar_url(seed):
+    return f"https://api.dicebear.com/7.x/bottts/svg?seed={seed}&backgroundColor=transparent&size=128"
 CSS = """
 .chat-window { font-family: 'Segoe UI', sans-serif; }
 .model-card {
     background: white;
+    min-width: 140px;
     border-radius: 12px;
     padding: 12px;
     box-shadow: 0 4px 12px rgba(0,0,0,0.08);
     align-items: center;
     border: 1px solid #eee;
     transition: transform 0.2s;
 }
+.model-card:hover { transform: translateY(-3px); }
 .card-name {
+    font-size: 11px;
+    font-weight: 700;
     text-transform: uppercase;
+    color: #888;
     margin-bottom: 4px;
 }
 .card-emoji {
+    font-size: 28px;
+    margin: 4px 0;
 }
 .card-badge {
+    font-size: 10px;
+    padding: 2px 8px;
+    border-radius: 10px;
+    margin-top: 4px;
+    font-weight: 600;
 }
+.bg-Pos { background-color: #e6fffa; color: #2c7a7b; }
+.bg-Neg { background-color: #fff5f5; color: #c53030; }
+.bg-Neu { background-color: #f7fafc; color: #4a5568; }
 .intent-row {
+    font-size: 10px;
+    color: #666;
+    margin-top: 6px;
+    border-top: 1px dashed #eee;
+    padding-top: 4px;
     width: 100%;
     text-align: center;
 }
     preds = get_predictions(message)
+    # 1. Create User Message HTML (with Emoji Reaction Bar)
+    # Order: DISTIL, RF, SVM, NB, LR, GB
+    reaction_string = "".join([preds[m]['emoji'] for m in model_names])
     user_html = f"""
     <div>
         {message}
+        <div class="user-reactions" title="Consensus: {reaction_string}">{reaction_string}</div>
     </div>
     """
     history.append({"role": "user", "content": user_html})
+    # 2. Create Single Bot Reply HTML (Horizontal Scroll Cards)
     cards_html = '<div class="model-scroll-container">'
     for name in model_names:
         p = preds[name]
+        # Color coding for sentiment
         sent_cls = "bg-Neu"
         if "Pos" in p['sentiment']: sent_cls = "bg-Pos"
         elif "Neg" in p['sentiment']: sent_cls = "bg-Neg"
         <div class="model-card">
             <div class="card-name">{name}</div>
             <div class="card-emoji">{p['emoji']}</div>
             <div class="card-badge {sent_cls}">{p['sentiment']}</div>
             <div class="intent-row">{p['intent']}</div>
         </div>