PR3DAT0R3 commited on
Commit
8dd094e
·
verified ·
1 Parent(s): 5d470eb

Upload 3 files

Browse files
Files changed (3) hide show
  1. Peter_Project.ipynb +0 -0
  2. app.py +357 -0
  3. requirements.txt +10 -0
Peter_Project.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
app.py ADDED
@@ -0,0 +1,357 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+ import random
5
+ from textblob import TextBlob
6
+ import pandas as pd
7
+ import requests
8
+ from io import StringIO
9
+ import gradio as gr
10
+ import speech_recognition as sr
11
+ import json
12
+ from sklearn.model_selection import train_test_split
13
+ from sklearn.preprocessing import LabelEncoder
14
+ from collections import Counter
15
+ import matplotlib.pyplot as plt
16
+ import seaborn as sns
17
+ from sklearn.feature_extraction.text import CountVectorizer
18
+ import numpy as np
19
+ import re
20
+ from torch.utils.data import Dataset, DataLoader
21
+
22
+ # --- Data Cleaning and Preprocessing ---
23
+ def clean_text(text):
24
+ if pd.isnull(text):
25
+ return ""
26
+ text = text.lower()
27
+ text = re.sub(r"http\S+|www\S+|https\S+", '', text) # Remove URLs
28
+ text = re.sub(r'\@\w+|\#','', text) # Remove @ and #
29
+ text = re.sub(r'[^a-z\s]', '', text) # Remove non-alphabetic characters
30
+ text = re.sub(r'\s+', ' ', text).strip() # Normalize spaces
31
+ return text
32
+
33
+ # --- Load datasets ---
34
+ df = pd.read_csv(
35
+ "https://drive.google.com/uc?export=download&id=14D_HcvTFL63-KffCQLNFxGH-oY_knwmo",
36
+ delimiter=';', header=None, names=['sentence', 'label']
37
+ )
38
+ ts_df = pd.read_csv(
39
+ "https://drive.google.com/uc?export=download&id=1Vmr1Rfv4pLSlAUrlOCxAcszvlxJOSHrm",
40
+ delimiter=';', header=None, names=['sentence', 'label']
41
+ )
42
+ df = pd.concat([df, ts_df], ignore_index=True)
43
+ df.drop_duplicates(inplace=True)
44
+ df['clean_sentence'] = df['sentence'].apply(clean_text)
45
+
46
+ # --- Build Vocabulary ---
47
+ tokenized = df['clean_sentence'].apply(str.split)
48
+ vocab = Counter([token for sentence in tokenized for token in sentence])
49
+ vocab = {word: i+2 for i, (word, _) in enumerate(vocab.most_common())}
50
+ vocab['<PAD>'] = 0
51
+ vocab['<UNK>'] = 1
52
+
53
+ def encode(text):
54
+ return [vocab.get(word, vocab['<UNK>']) for word in text]
55
+
56
+ encoded_texts = tokenized.apply(encode)
57
+
58
+ # --- Pad Sequences ---
59
+ MAX_LEN = 32
60
+ def pad_sequence(seq):
61
+ return seq[:MAX_LEN] + [vocab['<PAD>']] * max(0, MAX_LEN - len(seq))
62
+ padded = encoded_texts.apply(pad_sequence).tolist()
63
+
64
+ # --- Encode Labels ---
65
+ le = LabelEncoder()
66
+ labels = le.fit_transform(df['label'])
67
+
68
+ # --- Dataset + DataLoader ---
69
+ class EmotionDataset(Dataset):
70
+ def __init__(self, X, y):
71
+ self.X = torch.tensor(X, dtype=torch.long)
72
+ self.y = torch.tensor(y, dtype=torch.long)
73
+
74
+ def __len__(self):
75
+ return len(self.X)
76
+
77
+ def __getitem__(self, idx):
78
+ return self.X[idx], self.y[idx]
79
+
80
+ X_train, X_val, y_train, y_val = train_test_split(padded, labels, test_size=0.2, stratify=labels, random_state=42)
81
+ train_loader = DataLoader(EmotionDataset(X_train, y_train), batch_size=16, shuffle=True)
82
+ val_loader = DataLoader(EmotionDataset(X_val, y_val), batch_size=16)
83
+
84
+ # --- Positional Encoding ---
85
+ class PositionalEncoding(nn.Module):
86
+ def __init__(self, d_model, max_len=MAX_LEN):
87
+ super().__init__()
88
+ pe = torch.zeros(max_len, d_model)
89
+ position = torch.arange(0, max_len).unsqueeze(1)
90
+ div_term = torch.exp(torch.arange(0, d_model, 2) * (-np.log(10000.0) / d_model))
91
+ pe[:, 0::2] = torch.sin(position * div_term)
92
+ pe[:, 1::2] = torch.cos(position * div_term)
93
+ self.pe = pe.unsqueeze(0)
94
+
95
+ def forward(self, x):
96
+ return x + self.pe[:, :x.size(1)].to(x.device)
97
+
98
+ # --- Transformer Model with Masking + Dropout for Bayesian Inference ---
99
+ class EmotionTransformer(nn.Module):
100
+ def __init__(self, vocab_size, embed_dim, num_heads, num_classes):
101
+ super().__init__()
102
+ self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=vocab['<PAD>'])
103
+ self.pos_encoder = PositionalEncoding(embed_dim)
104
+ encoder_layer = nn.TransformerEncoderLayer(d_model=embed_dim, nhead=num_heads, batch_first=True)
105
+ self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=2)
106
+ self.dropout = nn.Dropout(0.3)
107
+ self.fc = nn.Linear(embed_dim, num_classes)
108
+
109
+ def forward(self, x):
110
+ mask = (x == vocab['<PAD>'])
111
+ x = self.embedding(x)
112
+ x = self.pos_encoder(x)
113
+ x = self.transformer(x, src_key_padding_mask=mask)
114
+ x = self.dropout(x.mean(dim=1)) # mean pooling
115
+ return self.fc(x)
116
+
117
+ # --- Train the Model ---
118
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
119
+ model = EmotionTransformer(len(vocab), embed_dim=64, num_heads=4, num_classes=len(le.classes_)).to(device)
120
+ optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
121
+ criterion = nn.CrossEntropyLoss()
122
+
123
+ for epoch in range(5):
124
+ model.train()
125
+ total_loss = 0
126
+ for X_batch, y_batch in train_loader:
127
+ X_batch, y_batch = X_batch.to(device), y_batch.to(device)
128
+ optimizer.zero_grad()
129
+ logits = model(X_batch)
130
+ loss = criterion(logits, y_batch)
131
+ loss.backward()
132
+ optimizer.step()
133
+ total_loss += loss.item()
134
+
135
+ # Validation
136
+ model.eval()
137
+ correct = total = 0
138
+ with torch.no_grad():
139
+ for X_batch, y_batch in val_loader:
140
+ X_batch, y_batch = X_batch.to(device), y_batch.to(device)
141
+ outputs = model(X_batch)
142
+ preds = torch.argmax(outputs, dim=1)
143
+ correct += (preds == y_batch).sum().item()
144
+ total += y_batch.size(0)
145
+
146
+ print(f"Epoch {epoch+1} | Train Loss: {total_loss:.4f} | Val Accuracy: {correct / total:.4f}")
147
+
148
+ # Save model
149
+ torch.save(model.state_dict(), "emotion_transformer_model.pth")
150
+
151
+ # --- Load Solutions CSV ---
152
+ file_id = "1yVJh_NVL4Y4YqEXGym47UCK5ZNZgVZYv"
153
+ url = f"https://drive.google.com/uc?export=download&id={file_id}"
154
+ response = requests.get(url)
155
+ csv_text = response.text
156
+
157
+ if csv_text.strip().startswith('<'):
158
+ raise Exception("ERROR: Google Drive link is not returning CSV! Check your sharing settings.")
159
+
160
+ solutions_df = pd.read_csv(StringIO(csv_text), header=0, on_bad_lines='skip')
161
+
162
+ used_solutions = {emotion: set() for emotion in solutions_df['emotion'].unique()}
163
+ negative_words = [
164
+ "not", "bad", "sad", "anxious", "anxiety", "depressed", "upset", "shit", "stress",
165
+ "worried", "unwell", "struggling", "low", "down", "terrible", "awful",
166
+ "nervous", "panic", "afraid", "scared", "tense", "overwhelmed", "fear", "uneasy"
167
+ ]
168
+
169
+ responses = {
170
+ "sadness": [
171
+ "It’s okay to feel down sometimes. I’m here to support you.",
172
+ "I'm really sorry you're going through this. Want to talk more about it?",
173
+ "You're not alone — I’m here for you."
174
+ ],
175
+ "anger": [
176
+ "That must have been frustrating. Want to vent about it?",
177
+ "It's okay to feel this way. I'm listening.",
178
+ "Would it help to talk through it?"
179
+ ],
180
+ "love": [
181
+ "That’s beautiful to hear! What made you feel that way?",
182
+ "It’s amazing to experience moments like that.",
183
+ "Sounds like something truly meaningful."
184
+ ],
185
+ "happiness": [
186
+ "That's awesome! What’s bringing you joy today?",
187
+ "I love hearing good news. 😊",
188
+ "Yay! Want to share more about it?"
189
+ ],
190
+ "neutral": [
191
+ "Got it. I’m here if you want to dive deeper.",
192
+ "Thanks for sharing that. Tell me more if you’d like.",
193
+ "I’m listening. How else can I support you?"
194
+ ]
195
+ }
196
+
197
+ relaxation_resources = {
198
+ "exercise": "Try this 5-4-3-2-1 grounding method:\n- 5 things you see\n- 4 you can touch\n- 3 you hear\n- 2 you smell\n- 1 you taste",
199
+ "video": "Here’s a short calming video that might help: https://youtu.be/O-6f5wQXSu8"
200
+ }
201
+
202
+ help_keywords = ["suggest", "help", "calm", "exercise", "relax", "how can i", "any tips", "can u", "can you"]
203
+ thank_you_inputs = ["thank", "thanks", "thank you"]
204
+ bye_inputs = ["bye", "goodbye", "see you", "take care", "ok bye", "exit", "quit"]
205
+
206
+ def correct_spelling(text):
207
+ return str(TextBlob(text).correct())
208
+
209
+ def get_sentiment(text):
210
+ blob = TextBlob(text)
211
+ return blob.sentiment.polarity
212
+
213
+ def is_negative_input(text):
214
+ text_lower = text.lower()
215
+ return any(word in text_lower for word in negative_words)
216
+
217
+ def get_unique_solution(emotion):
218
+ available = solutions_df[solutions_df['emotion'] == emotion]
219
+ unused = available[~available['solution'].isin(used_solutions[emotion])]
220
+ if unused.empty:
221
+ used_solutions[emotion] = set()
222
+ unused = available
223
+ solution_row = unused.sample(1).iloc[0]
224
+ used_solutions[emotion].add(solution_row['solution'])
225
+ return solution_row['solution']
226
+
227
+ def preprocess_input(text):
228
+ tokens = text.lower().split()
229
+ encoded = [vocab.get(token, vocab['<UNK>']) for token in tokens]
230
+ padded = encoded[:MAX_LEN] + [vocab['<PAD>']] * max(0, MAX_LEN - len(encoded))
231
+ return torch.tensor([padded], dtype=torch.long).to(next(model.parameters()).device)
232
+
233
+ def get_emotion(user_input):
234
+ if is_negative_input(user_input):
235
+ return "sadness"
236
+ sentiment = get_sentiment(user_input)
237
+ x = preprocess_input(user_input)
238
+ model.train()
239
+ with torch.no_grad():
240
+ probs = torch.stack([F.softmax(model(x), dim=1) for _ in range(5)])
241
+ avg_probs = probs.mean(dim=0)
242
+ prob, idx = torch.max(avg_probs, dim=1)
243
+ pred_emotion = le.classes_[idx.item()]
244
+ if prob.item() < 0.6:
245
+ return "neutral"
246
+ if sentiment < -0.25 and pred_emotion == "happiness":
247
+ return "sadness"
248
+ if sentiment > 0.25 and pred_emotion == "sadness":
249
+ return "happiness"
250
+ return pred_emotion
251
+
252
+ def audio_to_text(audio_file):
253
+ if audio_file is None:
254
+ return ""
255
+ recog = sr.Recognizer()
256
+ with sr.AudioFile(audio_file) as source:
257
+ audio = recog.record(source)
258
+ try:
259
+ text = recog.recognize_google(audio)
260
+ return text
261
+ except Exception:
262
+ return ""
263
+
264
+ # LLM API function
265
+ def call_llm_api(user_text):
266
+ api_url = "https://api-inference.huggingface.co/models/distilbert-base-uncased"
267
+ headers = {
268
+ "Authorization": f"Bearer YOUR KEY"
269
+ }
270
+ payload = {"inputs": user_text}
271
+ try:
272
+ resp = requests.post(api_url, headers=headers, json=payload, timeout=15)
273
+ output = resp.json()
274
+ if isinstance(output, dict) and 'error' in output:
275
+ return "API error: " + str(output['error'])
276
+ return str(output)
277
+ except Exception as e:
278
+ return f"API call failed: {e}"
279
+
280
+ GLOBAL_CONVO_HISTORY = []
281
+ USER_FEEDBACK_STATE = {}
282
+
283
+ def emoti_chat(audio, text, history_json=""):
284
+ # --- Get user input from voice or text ---
285
+ if text and text.strip():
286
+ user_input = text
287
+ elif audio is not None:
288
+ user_input = audio_to_text(audio)
289
+ else:
290
+ user_input = ""
291
+ if not user_input.strip():
292
+ return "Please say something or type your message.", json.dumps(GLOBAL_CONVO_HISTORY[-5:], indent=2), ""
293
+
294
+ user_input = correct_spelling(user_input)
295
+
296
+ # --- Exit logic ---
297
+ exit_phrases = ["exit", "quit", "goodbye", "bye", "close"]
298
+ if user_input.lower().strip() in exit_phrases:
299
+ return "Take care! I’m here whenever you want to talk. 👋", json.dumps(GLOBAL_CONVO_HISTORY[-5:], indent=2), gr.update(visible=False)
300
+
301
+ # --- HuggingFace LLM API call for "fun fact" or "more about" ---
302
+ if "fun fact" in user_input.lower() or "more about" in user_input.lower() or "api" in user_input.lower():
303
+ api_reply = call_llm_api("Tell me a fun fact about AI.")
304
+ return f"(LLM API response)\n{api_reply}", json.dumps(GLOBAL_CONVO_HISTORY[-5:], indent=2), ""
305
+
306
+ # Feedback logic
307
+ user_id = "default_user"
308
+ state = USER_FEEDBACK_STATE.get(user_id, {"emotion": None, "pending": False})
309
+
310
+ if state["pending"]:
311
+ feedback = user_input.lower().strip()
312
+ GLOBAL_CONVO_HISTORY[-1]["feedback"] = feedback
313
+ if feedback == "no":
314
+ suggestion = get_unique_solution(state["emotion"])
315
+ reply = f"Here's another suggestion for you: {suggestion}\nDid this help? (yes/no/skip)"
316
+ USER_FEEDBACK_STATE[user_id]["pending"] = True
317
+ return reply, json.dumps(GLOBAL_CONVO_HISTORY[-5:], indent=2), ""
318
+ else:
319
+ USER_FEEDBACK_STATE[user_id] = {"emotion": None, "pending": False}
320
+ return "How can I help you further?", json.dumps(GLOBAL_CONVO_HISTORY[-5:], indent=2), ""
321
+
322
+ # Normal user message: get emotion, give suggestion
323
+ pred_emotion = get_emotion(user_input)
324
+ support = random.choice(responses.get(pred_emotion, responses["neutral"]))
325
+ try:
326
+ suggestion = get_unique_solution(pred_emotion)
327
+ except Exception:
328
+ suggestion = get_unique_solution("neutral")
329
+
330
+ reply = f"{support}\n\nHere's a suggestion for you: {suggestion}\nDid this help? (yes/no/skip)"
331
+ GLOBAL_CONVO_HISTORY.append({
332
+ "user_input": user_input,
333
+ "emotion": pred_emotion,
334
+ "bot_support": support,
335
+ "bot_suggestion": suggestion,
336
+ "feedback": ""
337
+ })
338
+ USER_FEEDBACK_STATE[user_id] = {"emotion": pred_emotion, "pending": True}
339
+ return reply, json.dumps(GLOBAL_CONVO_HISTORY[-5:], indent=2), ""
340
+
341
+ # ---- Gradio Web Interface ----
342
+ iface = gr.Interface(
343
+ fn=emoti_chat,
344
+ inputs=[
345
+ gr.Audio(type="filepath", label="🎤 Speak your message"),
346
+ gr.Textbox(lines=2, placeholder="Or type your message here...", label="💬 Type message"),
347
+ gr.Textbox(lines=1, value="", visible=False) # Hidden, passes history state
348
+ ],
349
+ outputs=[
350
+ gr.Textbox(label="EmotiBot Reply"),
351
+ gr.Textbox(label="Hidden", visible=False)
352
+ ],
353
+ title="EmotiBot Connect",
354
+ description="Talk to EmotiBot using your voice or by typing. Detects your emotion, gives dynamic suggestions, remembers your feedback, and keeps a conversation history! Type 'fun fact' or 'api' for an AI-generated fact."
355
+ )
356
+
357
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ torch
3
+ pandas
4
+ requests
5
+ scikit-learn
6
+ textblob
7
+ speechrecognition
8
+ matplotlib
9
+ seaborn
10
+ numpy