rottg commited on
Commit
b535b97
·
verified ·
1 Parent(s): 03f1ed6

Update code

Browse files
Files changed (1) hide show
  1. stylometry.py +20 -16
stylometry.py CHANGED
@@ -76,7 +76,7 @@ LEET_PATTERN = re.compile(r'\b\w*\d+\w*\b')
76
  class AdvancedStyleFeatures:
77
  """Enhanced features extracted from a user's messages."""
78
 
79
- def __init__(self, user_id: int, user_name: str):
80
  self.user_id = user_id
81
  self.user_name = user_name
82
  self.message_count = 0
@@ -198,51 +198,55 @@ class AdvancedStylometryAnalyzer:
198
  self._embedding_model = False # Mark as failed
199
  return self._embedding_model if self._embedding_model else None
200
 
201
- def get_active_users(self, min_messages: int = 300, days: int = 365) -> List[Tuple[int, str, int]]:
202
  """Get users active in the last N days with at least min_messages."""
203
  cutoff_date = datetime.now() - timedelta(days=days)
204
- cutoff_str = cutoff_date.strftime('%Y-%m-%d')
205
 
206
  conn = sqlite3.connect(self.db_path)
207
  cursor = conn.cursor()
208
 
 
209
  query = """
210
- SELECT u.id, u.name, COUNT(m.id) as msg_count
211
- FROM users u
212
- JOIN messages m ON u.id = m.sender_id
213
- WHERE m.date >= ?
214
- GROUP BY u.id
 
 
215
  HAVING msg_count >= ?
216
  ORDER BY msg_count DESC
217
  """
218
 
219
- cursor.execute(query, (cutoff_str, min_messages))
220
  users = cursor.fetchall()
221
  conn.close()
222
 
223
  return users
224
 
225
- def get_user_messages(self, user_id: int, days: int = 365) -> List[Tuple[str, str]]:
226
  """Get messages for a user (text, date)."""
227
  cutoff_date = datetime.now() - timedelta(days=days)
228
- cutoff_str = cutoff_date.strftime('%Y-%m-%d')
229
 
230
  conn = sqlite3.connect(self.db_path)
231
  cursor = conn.cursor()
232
 
233
  query = """
234
- SELECT text, date FROM messages
235
- WHERE sender_id = ? AND date >= ? AND text IS NOT NULL AND text != ''
236
- ORDER BY date
 
237
  """
238
 
239
- cursor.execute(query, (user_id, cutoff_str))
240
  messages = cursor.fetchall()
241
  conn.close()
242
 
243
  return messages
244
 
245
- def extract_features(self, user_id: int, user_name: str,
246
  messages: List[Tuple[str, str]]) -> AdvancedStyleFeatures:
247
  """Extract comprehensive stylometric features from user messages."""
248
  features = AdvancedStyleFeatures(user_id, user_name)
 
76
  class AdvancedStyleFeatures:
77
  """Enhanced features extracted from a user's messages."""
78
 
79
+ def __init__(self, user_id: str, user_name: str):
80
  self.user_id = user_id
81
  self.user_name = user_name
82
  self.message_count = 0
 
198
  self._embedding_model = False # Mark as failed
199
  return self._embedding_model if self._embedding_model else None
200
 
201
+ def get_active_users(self, min_messages: int = 300, days: int = 365) -> List[Tuple[str, str, int]]:
202
  """Get users active in the last N days with at least min_messages."""
203
  cutoff_date = datetime.now() - timedelta(days=days)
204
+ cutoff_timestamp = int(cutoff_date.timestamp())
205
 
206
  conn = sqlite3.connect(self.db_path)
207
  cursor = conn.cursor()
208
 
209
+ # Use from_id and from_name directly from messages table
210
  query = """
211
+ SELECT from_id, MAX(from_name) as name, COUNT(*) as msg_count
212
+ FROM messages
213
+ WHERE date_unixtime >= ?
214
+ AND from_id IS NOT NULL
215
+ AND text_plain IS NOT NULL
216
+ AND text_plain != ''
217
+ GROUP BY from_id
218
  HAVING msg_count >= ?
219
  ORDER BY msg_count DESC
220
  """
221
 
222
+ cursor.execute(query, (cutoff_timestamp, min_messages))
223
  users = cursor.fetchall()
224
  conn.close()
225
 
226
  return users
227
 
228
+ def get_user_messages(self, user_id: str, days: int = 365) -> List[Tuple[str, str]]:
229
  """Get messages for a user (text, date)."""
230
  cutoff_date = datetime.now() - timedelta(days=days)
231
+ cutoff_timestamp = int(cutoff_date.timestamp())
232
 
233
  conn = sqlite3.connect(self.db_path)
234
  cursor = conn.cursor()
235
 
236
  query = """
237
+ SELECT text_plain, date FROM messages
238
+ WHERE from_id = ? AND date_unixtime >= ?
239
+ AND text_plain IS NOT NULL AND text_plain != ''
240
+ ORDER BY date_unixtime
241
  """
242
 
243
+ cursor.execute(query, (user_id, cutoff_timestamp))
244
  messages = cursor.fetchall()
245
  conn.close()
246
 
247
  return messages
248
 
249
+ def extract_features(self, user_id: str, user_name: str,
250
  messages: List[Tuple[str, str]]) -> AdvancedStyleFeatures:
251
  """Extract comprehensive stylometric features from user messages."""
252
  features = AdvancedStyleFeatures(user_id, user_name)