ak0601 commited on
Commit
d9b7ed5
·
verified ·
1 Parent(s): 507de1c

Update src/extract_features.py

Browse files
Files changed (1) hide show
  1. src/extract_features.py +72 -72
src/extract_features.py CHANGED
@@ -1,72 +1,72 @@
1
- import re
2
- import random
3
- from collections import Counter, defaultdict
4
-
5
- def parse_chat(file_path):
6
- pattern = r"(\d{1,2}/\d{1,2}/\d{2,4}), (\d{1,2}:\d{2}) - ([^:]+): (.*)"
7
- messages = []
8
-
9
- with open(file_path, "r", encoding="utf-8") as f:
10
- for line in f:
11
- match = re.match(pattern, line)
12
- if match:
13
- date, time, sender, text = match.groups()
14
-
15
- # Normalize names
16
- if sender == "ak":
17
- sender = "Aman"
18
- elif sender == "Sarah con H":
19
- sender = "Sarah"
20
-
21
- messages.append({
22
- "date": date,
23
- "time": time,
24
- "sender": sender,
25
- "text": text.strip()
26
- })
27
- return messages
28
-
29
-
30
- def extract_inside_jokes(messages):
31
- funny_candidates = []
32
- cute_candidates = []
33
- memory_candidates = []
34
- phrase_counter = Counter()
35
-
36
- funny_keywords = ["lol", "😂", "🤣", "lmao", "funny", "haha", "hehe","hahaha"]
37
- cute_keywords = ["miss", "thank", "sweet", "cute", "proud", "happy","aww","glad"]
38
-
39
- for msg in messages:
40
- text = msg["text"].lower()
41
-
42
- # Funny moments
43
- if any(k in text for k in funny_keywords):
44
- funny_candidates.append(msg["text"])
45
-
46
- # Cute/emotional moments
47
- if any(k in text for k in cute_keywords):
48
- cute_candidates.append(msg["text"])
49
-
50
- # Memorable random moments
51
- if len(msg["text"].split()) > 4: # skip too short
52
- memory_candidates.append(msg["text"])
53
-
54
- # Count repeated words
55
- phrase_counter.update(text.split())
56
-
57
- top_words = [w for w, c in phrase_counter.most_common(40)]
58
-
59
- return {
60
- "funny": funny_candidates,
61
- "cute": cute_candidates,
62
- "memories": memory_candidates,
63
- "top_words": top_words
64
- }
65
-
66
-
67
- def random_memory(messages):
68
- """Returns a random meaningful moment."""
69
- long_messages = [m["text"] for m in messages if len(m["text"]) > 10]
70
- if not long_messages:
71
- return "One of your old conversations ❤️"
72
- return random.choice(long_messages)
 
1
+ import re
2
+ import random
3
+ from collections import Counter, defaultdict
4
+
5
+ def parse_chat(file_path):
6
+ pattern = r"(\d{1,2}/\d{1,2}/\d{2,4}), (\d{1,2}:\d{2}) - ([^:]+): (.*)"
7
+ messages = []
8
+
9
+ with open(file_path, "r", encoding="utf-8") as f:
10
+ for line in f:
11
+ match = re.match(pattern, line)
12
+ if match:
13
+ date, time, sender, text = match.groups()
14
+
15
+ # Normalize names
16
+ if sender == "ak":
17
+ sender = "Aman"
18
+ elif sender == "Sarah con H":
19
+ sender = "Sarah"
20
+
21
+ messages.append({
22
+ "date": date,
23
+ "time": time,
24
+ "sender": sender,
25
+ "text": text.strip()
26
+ })
27
+ return messages
28
+
29
+
30
+ def extract_inside_jokes(messages):
31
+ funny_candidates = []
32
+ cute_candidates = []
33
+ memory_candidates = []
34
+ phrase_counter = Counter()
35
+
36
+ funny_keywords = ["lol", "😂", "🤣", "lmao", "funny", "haha", "hehe","hahaha","😅","😁","hahahaha"]
37
+ cute_keywords = ["miss", "thank", "sweet", "cute", "proud", "happy","aww","glad","nice","kind","brave"]
38
+
39
+ for msg in messages:
40
+ text = msg["text"].lower()
41
+
42
+ # Funny moments
43
+ if any(k in text for k in funny_keywords):
44
+ funny_candidates.append(msg["text"])
45
+
46
+ # Cute/emotional moments
47
+ if any(k in text for k in cute_keywords):
48
+ cute_candidates.append(msg["text"])
49
+
50
+ # Memorable random moments
51
+ if len(msg["text"].split()) > 4: # skip too short
52
+ memory_candidates.append(msg["text"])
53
+
54
+ # Count repeated words
55
+ phrase_counter.update(text.split())
56
+
57
+ top_words = [w for w, c in phrase_counter.most_common(40)]
58
+
59
+ return {
60
+ "funny": funny_candidates,
61
+ "cute": cute_candidates,
62
+ "memories": memory_candidates,
63
+ "top_words": top_words
64
+ }
65
+
66
+
67
+ def random_memory(messages):
68
+ """Returns a random meaningful moment."""
69
+ long_messages = [m["text"] for m in messages if len(m["text"]) > 10]
70
+ if not long_messages:
71
+ return "One of your old conversations ❤️"
72
+ return random.choice(long_messages)