rottg commited on
Commit
c4decf6
ยท
verified ยท
1 Parent(s): 4295eaa

Update code

Browse files
Files changed (4) hide show
  1. Dockerfile +1 -0
  2. dashboard.py +87 -0
  3. stylometry.py +487 -0
  4. templates/maintenance.html +750 -0
Dockerfile CHANGED
@@ -16,6 +16,7 @@ COPY search.py .
16
  COPY semantic_search.py .
17
  COPY hybrid_search.py .
18
  COPY gemini_client.py .
 
19
  COPY schema.sql .
20
  COPY static/ static/
21
  COPY templates/ templates/
 
16
  COPY semantic_search.py .
17
  COPY hybrid_search.py .
18
  COPY gemini_client.py .
19
+ COPY stylometry.py .
20
  COPY schema.sql .
21
  COPY static/ static/
22
  COPY templates/ templates/
dashboard.py CHANGED
@@ -299,6 +299,12 @@ def ai_search_page():
299
  return render_template('ai_search.html')
300
 
301
 
 
 
 
 
 
 
302
  # ==========================================
303
  # API ENDPOINTS - OVERVIEW STATS
304
  # ==========================================
@@ -2040,6 +2046,87 @@ def api_hybrid_status():
2040
  })
2041
 
2042
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2043
  def fallback_ai_search(query: str):
2044
  """Fallback search when AI is not available."""
2045
  conn = get_db()
 
299
  return render_template('ai_search.html')
300
 
301
 
302
+ @app.route('/maintenance')
303
+ def maintenance_page():
304
+ """Maintenance page - password protected."""
305
+ return render_template('maintenance.html')
306
+
307
+
308
  # ==========================================
309
  # API ENDPOINTS - OVERVIEW STATS
310
  # ==========================================
 
2046
  })
2047
 
2048
 
2049
+ # ==========================================
2050
+ # API ENDPOINTS - STYLOMETRY (Duplicate Detection)
2051
+ # ==========================================
2052
+
2053
+ # Global stylometry state
2054
+ _stylometry_status = {'status': 'idle', 'progress': 0, 'message': '', 'results': None}
2055
+
2056
+ @app.route('/api/stylometry/analyze', methods=['POST'])
2057
+ def api_stylometry_analyze():
2058
+ """Start stylometry analysis to detect duplicate accounts."""
2059
+ import threading
2060
+
2061
+ data = request.get_json() or {}
2062
+ min_messages = data.get('min_messages', 300)
2063
+ days = data.get('days', 365)
2064
+ threshold = data.get('threshold', 0.85)
2065
+
2066
+ global _stylometry_status
2067
+ _stylometry_status = {'status': 'running', 'progress': 0, 'message': 'ืžืชื—ื™ืœ ื ื™ืชื•ื—...', 'results': None}
2068
+
2069
+ def run_analysis():
2070
+ global _stylometry_status
2071
+ try:
2072
+ from stylometry import get_stylometry_analyzer
2073
+
2074
+ analyzer = get_stylometry_analyzer()
2075
+ analyzer.similarity_threshold = threshold
2076
+
2077
+ def progress_callback(event, *args):
2078
+ global _stylometry_status
2079
+ if event == 'users_found':
2080
+ _stylometry_status['message'] = f'ื ืžืฆืื• {args[0]} ืžืฉืชืžืฉื™ื ืœื ื™ืชื•ื—'
2081
+ _stylometry_status['progress'] = 5
2082
+ elif event == 'user_processed':
2083
+ current, total, name = args
2084
+ pct = 5 + int(70 * current / total)
2085
+ _stylometry_status['progress'] = pct
2086
+ _stylometry_status['message'] = f'ืžืขื‘ื“ {current}/{total}: {name}'
2087
+ elif event == 'comparing':
2088
+ current = args[0]
2089
+ total = args[1] if len(args) > 1 else 1
2090
+ pct = 75 + int(25 * current / max(1, total))
2091
+ _stylometry_status['progress'] = min(99, pct)
2092
+ _stylometry_status['message'] = 'ืžืฉื•ื•ื” ื“ืคื•ืกื™ ื›ืชื™ื‘ื”...'
2093
+
2094
+ results = analyzer.analyze_all_users(
2095
+ min_messages=min_messages,
2096
+ days=days,
2097
+ progress_callback=progress_callback
2098
+ )
2099
+
2100
+ _stylometry_status = {
2101
+ 'status': 'completed',
2102
+ 'progress': 100,
2103
+ 'message': 'ื”ื ื™ืชื•ื— ื”ื•ืฉืœื',
2104
+ 'results': results
2105
+ }
2106
+
2107
+ except Exception as e:
2108
+ import traceback
2109
+ _stylometry_status = {
2110
+ 'status': 'error',
2111
+ 'progress': 0,
2112
+ 'message': str(e),
2113
+ 'error': traceback.format_exc(),
2114
+ 'results': None
2115
+ }
2116
+
2117
+ # Run in background thread
2118
+ thread = threading.Thread(target=run_analysis)
2119
+ thread.start()
2120
+
2121
+ return jsonify({'status': 'started'})
2122
+
2123
+
2124
+ @app.route('/api/stylometry/status')
2125
+ def api_stylometry_status():
2126
+ """Get stylometry analysis status."""
2127
+ return jsonify(_stylometry_status)
2128
+
2129
+
2130
  def fallback_ai_search(query: str):
2131
  """Fallback search when AI is not available."""
2132
  conn = get_db()
stylometry.py ADDED
@@ -0,0 +1,487 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Stylometry Analysis Module for Hebrew Text
3
+ Detects potential duplicate accounts based on writing style patterns.
4
+ """
5
+
6
+ import re
7
+ import sqlite3
8
+ import math
9
+ from collections import Counter, defaultdict
10
+ from datetime import datetime, timedelta
11
+ from typing import Dict, List, Tuple, Optional
12
+ import json
13
+
14
+ # Hebrew character range
15
+ HEBREW_PATTERN = re.compile(r'[\u0590-\u05FF]')
16
+ ENGLISH_PATTERN = re.compile(r'[a-zA-Z]')
17
+ EMOJI_PATTERN = re.compile(
18
+ "["
19
+ "\U0001F600-\U0001F64F" # emoticons
20
+ "\U0001F300-\U0001F5FF" # symbols & pictographs
21
+ "\U0001F680-\U0001F6FF" # transport & map symbols
22
+ "\U0001F1E0-\U0001F1FF" # flags
23
+ "\U00002702-\U000027B0"
24
+ "\U000024C2-\U0001F251"
25
+ "]+",
26
+ flags=re.UNICODE
27
+ )
28
+
29
+ # Common Hebrew slang and expressions
30
+ HEBREW_SLANG = ['ืื—ืœื”', 'ืกื‘ื‘ื”', 'ื™ืืœืœื”', 'ื•ื•ืืœื”', 'ื‘ืืกื”', 'ื—ื‘ืœ', 'ืžื’ื ื™ื‘', 'ืืฉื›ืจื”', 'ื—ื—ื—ื—', 'ื—ื—ื—', 'ื”ื”ื”ื”', 'ืžืžืžืž']
31
+ HEBREW_ACRONYMS = ['ื‘ืขื–ื”ืฉ', 'ืื›ื', 'ืœื•ืœ', 'ื‘ื˜ื—', 'ื ืœืขื ื“', 'ืชื ืฆื‘ื”', 'ื–ืืช']
32
+
33
+
34
+ class StyleFeatures:
35
+ """Features extracted from a user's messages."""
36
+
37
+ def __init__(self, user_id: int, user_name: str):
38
+ self.user_id = user_id
39
+ self.user_name = user_name
40
+ self.message_count = 0
41
+
42
+ # Length features
43
+ self.avg_message_length = 0.0
44
+ self.avg_word_length = 0.0
45
+ self.std_message_length = 0.0
46
+
47
+ # Character ratios
48
+ self.hebrew_ratio = 0.0
49
+ self.english_ratio = 0.0
50
+ self.digit_ratio = 0.0
51
+ self.emoji_ratio = 0.0
52
+
53
+ # Punctuation patterns
54
+ self.comma_rate = 0.0
55
+ self.period_rate = 0.0
56
+ self.question_rate = 0.0
57
+ self.exclamation_rate = 0.0
58
+ self.ellipsis_rate = 0.0 # ...
59
+
60
+ # Special patterns
61
+ self.caps_ratio = 0.0
62
+ self.repeated_chars_rate = 0.0 # ื›ืŸืŸืŸืŸืŸ
63
+ self.slang_rate = 0.0
64
+
65
+ # Time patterns (24 hours distribution)
66
+ self.hour_distribution = [0.0] * 24
67
+ self.weekend_ratio = 0.0
68
+
69
+ # Word patterns
70
+ self.unique_word_ratio = 0.0
71
+ self.short_message_ratio = 0.0 # < 5 words
72
+
73
+ # Top character bigrams (normalized)
74
+ self.char_bigrams: Dict[str, float] = {}
75
+
76
+ # Feature vector for similarity calculation
77
+ self.feature_vector: List[float] = []
78
+
79
+ def to_dict(self) -> dict:
80
+ return {
81
+ 'user_id': self.user_id,
82
+ 'user_name': self.user_name,
83
+ 'message_count': self.message_count,
84
+ 'avg_message_length': round(self.avg_message_length, 2),
85
+ 'avg_word_length': round(self.avg_word_length, 2),
86
+ 'hebrew_ratio': round(self.hebrew_ratio, 3),
87
+ 'english_ratio': round(self.english_ratio, 3),
88
+ 'emoji_ratio': round(self.emoji_ratio, 3),
89
+ 'question_rate': round(self.question_rate, 3),
90
+ 'exclamation_rate': round(self.exclamation_rate, 3),
91
+ 'ellipsis_rate': round(self.ellipsis_rate, 3),
92
+ 'repeated_chars_rate': round(self.repeated_chars_rate, 3),
93
+ 'weekend_ratio': round(self.weekend_ratio, 3),
94
+ 'unique_word_ratio': round(self.unique_word_ratio, 3),
95
+ }
96
+
97
+
98
+ class StylometryAnalyzer:
99
+ """Analyzes writing styles to detect potential duplicate accounts."""
100
+
101
+ def __init__(self, db_path: str = 'telegram_data.db'):
102
+ self.db_path = db_path
103
+ self.user_features: Dict[int, StyleFeatures] = {}
104
+ self.similarity_threshold = 0.85 # Adjustable threshold
105
+
106
+ def get_active_users(self, min_messages: int = 300, days: int = 365) -> List[Tuple[int, str, int]]:
107
+ """Get users active in the last N days with at least min_messages."""
108
+ cutoff_date = datetime.now() - timedelta(days=days)
109
+ cutoff_str = cutoff_date.strftime('%Y-%m-%d')
110
+
111
+ conn = sqlite3.connect(self.db_path)
112
+ cursor = conn.cursor()
113
+
114
+ query = """
115
+ SELECT u.id, u.name, COUNT(m.id) as msg_count
116
+ FROM users u
117
+ JOIN messages m ON u.id = m.sender_id
118
+ WHERE m.date >= ?
119
+ GROUP BY u.id
120
+ HAVING msg_count >= ?
121
+ ORDER BY msg_count DESC
122
+ """
123
+
124
+ cursor.execute(query, (cutoff_str, min_messages))
125
+ users = cursor.fetchall()
126
+ conn.close()
127
+
128
+ return users
129
+
130
+ def get_user_messages(self, user_id: int, days: int = 365) -> List[Tuple[str, str]]:
131
+ """Get messages for a user (text, date)."""
132
+ cutoff_date = datetime.now() - timedelta(days=days)
133
+ cutoff_str = cutoff_date.strftime('%Y-%m-%d')
134
+
135
+ conn = sqlite3.connect(self.db_path)
136
+ cursor = conn.cursor()
137
+
138
+ query = """
139
+ SELECT text, date FROM messages
140
+ WHERE sender_id = ? AND date >= ? AND text IS NOT NULL AND text != ''
141
+ ORDER BY date
142
+ """
143
+
144
+ cursor.execute(query, (user_id, cutoff_str))
145
+ messages = cursor.fetchall()
146
+ conn.close()
147
+
148
+ return messages
149
+
150
+ def extract_features(self, user_id: int, user_name: str, messages: List[Tuple[str, str]]) -> StyleFeatures:
151
+ """Extract stylometric features from user messages."""
152
+ features = StyleFeatures(user_id, user_name)
153
+ features.message_count = len(messages)
154
+
155
+ if not messages:
156
+ return features
157
+
158
+ # Collect statistics
159
+ message_lengths = []
160
+ word_lengths = []
161
+ all_words = []
162
+ unique_words = set()
163
+ short_messages = 0
164
+
165
+ hebrew_chars = 0
166
+ english_chars = 0
167
+ digit_chars = 0
168
+ total_chars = 0
169
+ caps_chars = 0
170
+
171
+ commas = 0
172
+ periods = 0
173
+ questions = 0
174
+ exclamations = 0
175
+ ellipsis = 0
176
+
177
+ repeated_char_msgs = 0
178
+ slang_count = 0
179
+ emoji_count = 0
180
+
181
+ hour_counts = [0] * 24
182
+ weekend_msgs = 0
183
+
184
+ char_bigram_counter = Counter()
185
+
186
+ for text, date_str in messages:
187
+ if not text:
188
+ continue
189
+
190
+ # Message length
191
+ msg_len = len(text)
192
+ message_lengths.append(msg_len)
193
+ total_chars += msg_len
194
+
195
+ # Word analysis
196
+ words = text.split()
197
+ if len(words) < 5:
198
+ short_messages += 1
199
+ for word in words:
200
+ word_lengths.append(len(word))
201
+ all_words.append(word.lower())
202
+ unique_words.add(word.lower())
203
+
204
+ # Character analysis
205
+ hebrew_chars += len(HEBREW_PATTERN.findall(text))
206
+ english_chars += len(ENGLISH_PATTERN.findall(text))
207
+ digit_chars += sum(1 for c in text if c.isdigit())
208
+ caps_chars += sum(1 for c in text if c.isupper())
209
+
210
+ # Emoji analysis
211
+ emojis = EMOJI_PATTERN.findall(text)
212
+ emoji_count += len(emojis)
213
+
214
+ # Punctuation
215
+ commas += text.count(',')
216
+ periods += text.count('.')
217
+ questions += text.count('?')
218
+ exclamations += text.count('!')
219
+ ellipsis += text.count('...')
220
+
221
+ # Repeated characters pattern (like ื›ืŸืŸืŸืŸืŸ or ืื”ื”ื”ื”ื”)
222
+ if re.search(r'(.)\1{3,}', text):
223
+ repeated_char_msgs += 1
224
+
225
+ # Slang detection
226
+ text_lower = text.lower()
227
+ for slang in HEBREW_SLANG:
228
+ if slang in text:
229
+ slang_count += 1
230
+ break
231
+
232
+ # Time analysis
233
+ try:
234
+ if 'T' in date_str:
235
+ dt = datetime.fromisoformat(date_str.replace('Z', '+00:00'))
236
+ else:
237
+ dt = datetime.strptime(date_str[:19], '%Y-%m-%d %H:%M:%S')
238
+ hour_counts[dt.hour] += 1
239
+ if dt.weekday() >= 5: # Saturday=5, Sunday=6
240
+ weekend_msgs += 1
241
+ except:
242
+ pass
243
+
244
+ # Character bigrams
245
+ clean_text = re.sub(r'\s+', ' ', text.lower())
246
+ for i in range(len(clean_text) - 1):
247
+ bigram = clean_text[i:i+2]
248
+ if bigram.strip():
249
+ char_bigram_counter[bigram] += 1
250
+
251
+ n_msgs = len(messages)
252
+
253
+ # Calculate averages
254
+ if message_lengths:
255
+ features.avg_message_length = sum(message_lengths) / len(message_lengths)
256
+ variance = sum((x - features.avg_message_length) ** 2 for x in message_lengths) / len(message_lengths)
257
+ features.std_message_length = math.sqrt(variance)
258
+
259
+ if word_lengths:
260
+ features.avg_word_length = sum(word_lengths) / len(word_lengths)
261
+
262
+ # Character ratios
263
+ if total_chars > 0:
264
+ features.hebrew_ratio = hebrew_chars / total_chars
265
+ features.english_ratio = english_chars / total_chars
266
+ features.digit_ratio = digit_chars / total_chars
267
+ features.emoji_ratio = emoji_count / total_chars
268
+ features.caps_ratio = caps_chars / max(1, english_chars)
269
+
270
+ # Punctuation rates (per message)
271
+ features.comma_rate = commas / n_msgs
272
+ features.period_rate = periods / n_msgs
273
+ features.question_rate = questions / n_msgs
274
+ features.exclamation_rate = exclamations / n_msgs
275
+ features.ellipsis_rate = ellipsis / n_msgs
276
+
277
+ # Special patterns
278
+ features.repeated_chars_rate = repeated_char_msgs / n_msgs
279
+ features.slang_rate = slang_count / n_msgs
280
+
281
+ # Time patterns
282
+ total_hour_msgs = sum(hour_counts)
283
+ if total_hour_msgs > 0:
284
+ features.hour_distribution = [h / total_hour_msgs for h in hour_counts]
285
+ features.weekend_ratio = weekend_msgs / n_msgs
286
+
287
+ # Word patterns
288
+ if all_words:
289
+ features.unique_word_ratio = len(unique_words) / len(all_words)
290
+ features.short_message_ratio = short_messages / n_msgs
291
+
292
+ # Top character bigrams (normalized)
293
+ total_bigrams = sum(char_bigram_counter.values())
294
+ if total_bigrams > 0:
295
+ top_bigrams = char_bigram_counter.most_common(50)
296
+ features.char_bigrams = {bg: count / total_bigrams for bg, count in top_bigrams}
297
+
298
+ # Build feature vector for similarity calculation
299
+ features.feature_vector = self._build_feature_vector(features)
300
+
301
+ return features
302
+
303
+ def _build_feature_vector(self, f: StyleFeatures) -> List[float]:
304
+ """Build normalized feature vector for similarity comparison."""
305
+ vector = [
306
+ f.avg_message_length / 100, # Normalize to ~1
307
+ f.avg_word_length / 10,
308
+ f.hebrew_ratio,
309
+ f.english_ratio,
310
+ f.emoji_ratio * 10, # Scale up small values
311
+ f.question_rate,
312
+ f.exclamation_rate,
313
+ f.ellipsis_rate * 5,
314
+ f.repeated_chars_rate * 10,
315
+ f.weekend_ratio,
316
+ f.unique_word_ratio,
317
+ f.short_message_ratio,
318
+ f.caps_ratio,
319
+ f.slang_rate,
320
+ f.comma_rate,
321
+ f.period_rate,
322
+ ]
323
+
324
+ # Add hour distribution (24 values)
325
+ vector.extend(f.hour_distribution)
326
+
327
+ return vector
328
+
329
+ def calculate_similarity(self, f1: StyleFeatures, f2: StyleFeatures) -> float:
330
+ """Calculate cosine similarity between two feature vectors."""
331
+ v1 = f1.feature_vector
332
+ v2 = f2.feature_vector
333
+
334
+ if not v1 or not v2 or len(v1) != len(v2):
335
+ return 0.0
336
+
337
+ # Cosine similarity
338
+ dot_product = sum(a * b for a, b in zip(v1, v2))
339
+ norm1 = math.sqrt(sum(a * a for a in v1))
340
+ norm2 = math.sqrt(sum(b * b for b in v2))
341
+
342
+ if norm1 == 0 or norm2 == 0:
343
+ return 0.0
344
+
345
+ cosine_sim = dot_product / (norm1 * norm2)
346
+
347
+ # Also compare character bigrams (Jaccard-like)
348
+ bigram_sim = self._compare_bigrams(f1.char_bigrams, f2.char_bigrams)
349
+
350
+ # Weighted combination
351
+ return 0.7 * cosine_sim + 0.3 * bigram_sim
352
+
353
+ def _compare_bigrams(self, bg1: Dict[str, float], bg2: Dict[str, float]) -> float:
354
+ """Compare character bigram distributions."""
355
+ if not bg1 or not bg2:
356
+ return 0.0
357
+
358
+ all_bigrams = set(bg1.keys()) | set(bg2.keys())
359
+ if not all_bigrams:
360
+ return 0.0
361
+
362
+ # Calculate similarity based on shared bigrams
363
+ intersection = 0.0
364
+ union = 0.0
365
+
366
+ for bg in all_bigrams:
367
+ v1 = bg1.get(bg, 0)
368
+ v2 = bg2.get(bg, 0)
369
+ intersection += min(v1, v2)
370
+ union += max(v1, v2)
371
+
372
+ if union == 0:
373
+ return 0.0
374
+
375
+ return intersection / union
376
+
377
+ def analyze_all_users(self, min_messages: int = 300, days: int = 365,
378
+ progress_callback=None) -> Dict:
379
+ """Analyze all active users and find potential duplicates."""
380
+
381
+ # Get active users
382
+ users = self.get_active_users(min_messages, days)
383
+ total_users = len(users)
384
+
385
+ if progress_callback:
386
+ progress_callback('users_found', total_users)
387
+
388
+ # Extract features for each user
389
+ self.user_features = {}
390
+ for idx, (user_id, user_name, msg_count) in enumerate(users):
391
+ messages = self.get_user_messages(user_id, days)
392
+ features = self.extract_features(user_id, user_name or f"User_{user_id}", messages)
393
+ self.user_features[user_id] = features
394
+
395
+ if progress_callback:
396
+ progress_callback('user_processed', idx + 1, total_users, user_name)
397
+
398
+ # Find similar pairs
399
+ if progress_callback:
400
+ progress_callback('comparing', 0)
401
+
402
+ similar_pairs = []
403
+ user_ids = list(self.user_features.keys())
404
+ total_comparisons = len(user_ids) * (len(user_ids) - 1) // 2
405
+ comparison_count = 0
406
+
407
+ for i in range(len(user_ids)):
408
+ for j in range(i + 1, len(user_ids)):
409
+ uid1, uid2 = user_ids[i], user_ids[j]
410
+ f1, f2 = self.user_features[uid1], self.user_features[uid2]
411
+
412
+ similarity = self.calculate_similarity(f1, f2)
413
+
414
+ if similarity >= self.similarity_threshold:
415
+ similar_pairs.append({
416
+ 'user1': f1.to_dict(),
417
+ 'user2': f2.to_dict(),
418
+ 'similarity': round(similarity * 100, 1),
419
+ 'details': self._get_similarity_details(f1, f2)
420
+ })
421
+
422
+ comparison_count += 1
423
+ if progress_callback and comparison_count % 100 == 0:
424
+ progress_callback('comparing', comparison_count, total_comparisons)
425
+
426
+ # Sort by similarity (highest first)
427
+ similar_pairs.sort(key=lambda x: x['similarity'], reverse=True)
428
+
429
+ return {
430
+ 'total_users_analyzed': total_users,
431
+ 'threshold': self.similarity_threshold * 100,
432
+ 'potential_duplicates': len(similar_pairs),
433
+ 'pairs': similar_pairs,
434
+ 'all_users': [f.to_dict() for f in self.user_features.values()]
435
+ }
436
+
437
+ def _get_similarity_details(self, f1: StyleFeatures, f2: StyleFeatures) -> List[str]:
438
+ """Get human-readable similarity details."""
439
+ details = []
440
+
441
+ # Message length similarity
442
+ len_diff = abs(f1.avg_message_length - f2.avg_message_length)
443
+ if len_diff < 10:
444
+ details.append(f"ืื•ืจืš ื”ื•ื“ืขื” ื“ื•ืžื” ({f1.avg_message_length:.0f} vs {f2.avg_message_length:.0f})")
445
+
446
+ # Hebrew/English ratio
447
+ heb_diff = abs(f1.hebrew_ratio - f2.hebrew_ratio)
448
+ if heb_diff < 0.1:
449
+ details.append(f"ื™ื—ืก ืขื‘ืจื™ืช/ืื ื’ืœื™ืช ื“ื•ืžื” ({f1.hebrew_ratio:.0%} vs {f2.hebrew_ratio:.0%})")
450
+
451
+ # Emoji usage
452
+ emoji_diff = abs(f1.emoji_ratio - f2.emoji_ratio)
453
+ if emoji_diff < 0.01:
454
+ details.append("ืฉื™ืžื•ืฉ ื“ื•ืžื” ื‘ืื™ืžื•ื’'ื™")
455
+
456
+ # Question marks
457
+ q_diff = abs(f1.question_rate - f2.question_rate)
458
+ if q_diff < 0.1:
459
+ details.append("ืฉื™ืžื•ืฉ ื“ื•ืžื” ื‘ืกื™ืžื ื™ ืฉืืœื”")
460
+
461
+ # Weekend activity
462
+ weekend_diff = abs(f1.weekend_ratio - f2.weekend_ratio)
463
+ if weekend_diff < 0.1:
464
+ details.append("ืคืขื™ืœื•ืช ื“ื•ืžื” ื‘ืกื•ืค\"ืฉ")
465
+
466
+ # Repeated characters
467
+ if abs(f1.repeated_chars_rate - f2.repeated_chars_rate) < 0.05:
468
+ if f1.repeated_chars_rate > 0.1:
469
+ details.append("ืฉื ื™ื”ื ืžืฉืชืžืฉื™ื ื‘ืชื•ื•ื™ื ื—ื•ื–ืจื™ื (ื›ืžื• ื›ืŸืŸืŸืŸืŸ)")
470
+
471
+ # Time patterns
472
+ hour_sim = sum(min(h1, h2) for h1, h2 in zip(f1.hour_distribution, f2.hour_distribution))
473
+ if hour_sim > 0.7:
474
+ details.append("ื“ืคื•ืก ืฉืขื•ืช ืคืขื™ืœื•ืช ื“ื•ืžื”")
475
+
476
+ return details
477
+
478
+
479
+ # Singleton instance
480
+ _analyzer_instance: Optional[StylometryAnalyzer] = None
481
+
482
+ def get_stylometry_analyzer() -> StylometryAnalyzer:
483
+ """Get or create the stylometry analyzer singleton."""
484
+ global _analyzer_instance
485
+ if _analyzer_instance is None:
486
+ _analyzer_instance = StylometryAnalyzer()
487
+ return _analyzer_instance
templates/maintenance.html ADDED
@@ -0,0 +1,750 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="he" dir="rtl">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>ืชื—ื–ื•ืงื” - Telegram Analytics</title>
7
+ <style>
8
+ * {
9
+ margin: 0;
10
+ padding: 0;
11
+ box-sizing: border-box;
12
+ }
13
+
14
+ body {
15
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, sans-serif;
16
+ background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%);
17
+ min-height: 100vh;
18
+ color: #e4e4e4;
19
+ }
20
+
21
+ .header {
22
+ background: rgba(0, 0, 0, 0.3);
23
+ padding: 15px 30px;
24
+ display: flex;
25
+ justify-content: space-between;
26
+ align-items: center;
27
+ border-bottom: 1px solid rgba(255, 255, 255, 0.1);
28
+ }
29
+
30
+ .header h1 {
31
+ font-size: 1.5rem;
32
+ color: #ff6b6b;
33
+ }
34
+
35
+ .nav-links {
36
+ display: flex;
37
+ gap: 20px;
38
+ }
39
+
40
+ .nav-links a {
41
+ color: #a0a0a0;
42
+ text-decoration: none;
43
+ padding: 8px 16px;
44
+ border-radius: 6px;
45
+ transition: all 0.3s ease;
46
+ }
47
+
48
+ .nav-links a:hover {
49
+ background: rgba(255, 255, 255, 0.1);
50
+ color: #fff;
51
+ }
52
+
53
+ .nav-links a.active {
54
+ background: rgba(255, 107, 107, 0.2);
55
+ color: #ff6b6b;
56
+ }
57
+
58
+ /* Password Modal */
59
+ .modal-overlay {
60
+ position: fixed;
61
+ top: 0;
62
+ left: 0;
63
+ right: 0;
64
+ bottom: 0;
65
+ background: rgba(0, 0, 0, 0.8);
66
+ display: flex;
67
+ align-items: center;
68
+ justify-content: center;
69
+ z-index: 1000;
70
+ }
71
+
72
+ .modal-overlay.hidden {
73
+ display: none;
74
+ }
75
+
76
+ .modal {
77
+ background: #1e2a3a;
78
+ padding: 40px;
79
+ border-radius: 12px;
80
+ text-align: center;
81
+ border: 1px solid rgba(255, 107, 107, 0.3);
82
+ box-shadow: 0 10px 40px rgba(0, 0, 0, 0.5);
83
+ }
84
+
85
+ .modal h2 {
86
+ margin-bottom: 20px;
87
+ color: #ff6b6b;
88
+ }
89
+
90
+ .modal input {
91
+ padding: 12px 20px;
92
+ font-size: 1.2rem;
93
+ border: 2px solid #3a4a5a;
94
+ border-radius: 8px;
95
+ background: #0d1520;
96
+ color: #fff;
97
+ text-align: center;
98
+ letter-spacing: 4px;
99
+ width: 200px;
100
+ }
101
+
102
+ .modal input:focus {
103
+ outline: none;
104
+ border-color: #ff6b6b;
105
+ }
106
+
107
+ .modal button {
108
+ display: block;
109
+ margin: 20px auto 0;
110
+ padding: 12px 40px;
111
+ font-size: 1rem;
112
+ background: #ff6b6b;
113
+ color: #fff;
114
+ border: none;
115
+ border-radius: 8px;
116
+ cursor: pointer;
117
+ transition: background 0.3s ease;
118
+ }
119
+
120
+ .modal button:hover {
121
+ background: #ff5252;
122
+ }
123
+
124
+ .modal .error {
125
+ color: #ff6b6b;
126
+ margin-top: 15px;
127
+ font-size: 0.9rem;
128
+ }
129
+
130
+ /* Main Content */
131
+ .main-content {
132
+ padding: 30px;
133
+ max-width: 1400px;
134
+ margin: 0 auto;
135
+ }
136
+
137
+ .main-content.locked {
138
+ filter: blur(10px);
139
+ pointer-events: none;
140
+ }
141
+
142
+ .section {
143
+ background: rgba(255, 255, 255, 0.05);
144
+ border-radius: 12px;
145
+ padding: 25px;
146
+ margin-bottom: 25px;
147
+ border: 1px solid rgba(255, 255, 255, 0.1);
148
+ }
149
+
150
+ .section h2 {
151
+ color: #ff6b6b;
152
+ margin-bottom: 15px;
153
+ font-size: 1.3rem;
154
+ }
155
+
156
+ .section p {
157
+ color: #a0a0a0;
158
+ margin-bottom: 15px;
159
+ line-height: 1.6;
160
+ }
161
+
162
+ /* Analysis Controls */
163
+ .controls {
164
+ display: flex;
165
+ gap: 20px;
166
+ align-items: center;
167
+ flex-wrap: wrap;
168
+ }
169
+
170
+ .control-group {
171
+ display: flex;
172
+ flex-direction: column;
173
+ gap: 5px;
174
+ }
175
+
176
+ .control-group label {
177
+ font-size: 0.85rem;
178
+ color: #a0a0a0;
179
+ }
180
+
181
+ .control-group input, .control-group select {
182
+ padding: 10px 15px;
183
+ border: 1px solid #3a4a5a;
184
+ border-radius: 6px;
185
+ background: #0d1520;
186
+ color: #fff;
187
+ font-size: 1rem;
188
+ }
189
+
190
+ .btn-primary {
191
+ padding: 12px 30px;
192
+ background: linear-gradient(135deg, #ff6b6b, #ff5252);
193
+ color: #fff;
194
+ border: none;
195
+ border-radius: 8px;
196
+ font-size: 1rem;
197
+ cursor: pointer;
198
+ transition: all 0.3s ease;
199
+ margin-top: 20px;
200
+ }
201
+
202
+ .btn-primary:hover {
203
+ transform: translateY(-2px);
204
+ box-shadow: 0 5px 20px rgba(255, 107, 107, 0.3);
205
+ }
206
+
207
+ .btn-primary:disabled {
208
+ opacity: 0.5;
209
+ cursor: not-allowed;
210
+ transform: none;
211
+ }
212
+
213
+ /* Progress */
214
+ .progress-container {
215
+ margin-top: 20px;
216
+ display: none;
217
+ }
218
+
219
+ .progress-container.active {
220
+ display: block;
221
+ }
222
+
223
+ .progress-bar {
224
+ height: 8px;
225
+ background: #1a2535;
226
+ border-radius: 4px;
227
+ overflow: hidden;
228
+ margin-bottom: 10px;
229
+ }
230
+
231
+ .progress-fill {
232
+ height: 100%;
233
+ background: linear-gradient(90deg, #ff6b6b, #ff5252);
234
+ width: 0%;
235
+ transition: width 0.3s ease;
236
+ }
237
+
238
+ .progress-text {
239
+ color: #a0a0a0;
240
+ font-size: 0.9rem;
241
+ }
242
+
243
+ /* Results */
244
+ .results-container {
245
+ display: none;
246
+ }
247
+
248
+ .results-container.active {
249
+ display: block;
250
+ }
251
+
252
+ .stats-grid {
253
+ display: grid;
254
+ grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
255
+ gap: 20px;
256
+ margin-bottom: 25px;
257
+ }
258
+
259
+ .stat-card {
260
+ background: rgba(0, 0, 0, 0.3);
261
+ padding: 20px;
262
+ border-radius: 10px;
263
+ text-align: center;
264
+ }
265
+
266
+ .stat-card .value {
267
+ font-size: 2rem;
268
+ font-weight: bold;
269
+ color: #ff6b6b;
270
+ }
271
+
272
+ .stat-card .label {
273
+ color: #a0a0a0;
274
+ font-size: 0.9rem;
275
+ margin-top: 5px;
276
+ }
277
+
278
+ /* Pairs List */
279
+ .pairs-list {
280
+ display: flex;
281
+ flex-direction: column;
282
+ gap: 15px;
283
+ }
284
+
285
+ .pair-card {
286
+ background: rgba(0, 0, 0, 0.3);
287
+ border-radius: 10px;
288
+ padding: 20px;
289
+ border-right: 4px solid;
290
+ }
291
+
292
+ .pair-card.high {
293
+ border-color: #ff5252;
294
+ }
295
+
296
+ .pair-card.medium {
297
+ border-color: #ffa726;
298
+ }
299
+
300
+ .pair-card.low {
301
+ border-color: #66bb6a;
302
+ }
303
+
304
+ .pair-header {
305
+ display: flex;
306
+ justify-content: space-between;
307
+ align-items: center;
308
+ margin-bottom: 15px;
309
+ }
310
+
311
+ .pair-users {
312
+ display: flex;
313
+ gap: 20px;
314
+ align-items: center;
315
+ }
316
+
317
+ .user-info {
318
+ display: flex;
319
+ flex-direction: column;
320
+ }
321
+
322
+ .user-name {
323
+ font-weight: bold;
324
+ color: #fff;
325
+ }
326
+
327
+ .user-stats {
328
+ font-size: 0.85rem;
329
+ color: #a0a0a0;
330
+ }
331
+
332
+ .vs-badge {
333
+ background: #3a4a5a;
334
+ padding: 5px 12px;
335
+ border-radius: 15px;
336
+ font-size: 0.8rem;
337
+ color: #fff;
338
+ }
339
+
340
+ .similarity-badge {
341
+ font-size: 1.5rem;
342
+ font-weight: bold;
343
+ }
344
+
345
+ .pair-card.high .similarity-badge {
346
+ color: #ff5252;
347
+ }
348
+
349
+ .pair-card.medium .similarity-badge {
350
+ color: #ffa726;
351
+ }
352
+
353
+ .pair-card.low .similarity-badge {
354
+ color: #66bb6a;
355
+ }
356
+
357
+ .pair-details {
358
+ display: flex;
359
+ flex-wrap: wrap;
360
+ gap: 10px;
361
+ }
362
+
363
+ .detail-tag {
364
+ background: rgba(255, 107, 107, 0.1);
365
+ color: #ff6b6b;
366
+ padding: 5px 12px;
367
+ border-radius: 15px;
368
+ font-size: 0.85rem;
369
+ }
370
+
371
+ .no-duplicates {
372
+ text-align: center;
373
+ padding: 40px;
374
+ color: #66bb6a;
375
+ }
376
+
377
+ .no-duplicates svg {
378
+ width: 60px;
379
+ height: 60px;
380
+ margin-bottom: 15px;
381
+ }
382
+
383
+ /* Feature comparison table */
384
+ .comparison-toggle {
385
+ background: none;
386
+ border: 1px solid #3a4a5a;
387
+ color: #a0a0a0;
388
+ padding: 5px 15px;
389
+ border-radius: 5px;
390
+ cursor: pointer;
391
+ font-size: 0.85rem;
392
+ margin-top: 10px;
393
+ }
394
+
395
+ .comparison-toggle:hover {
396
+ border-color: #ff6b6b;
397
+ color: #ff6b6b;
398
+ }
399
+
400
+ .comparison-table {
401
+ display: none;
402
+ margin-top: 15px;
403
+ width: 100%;
404
+ font-size: 0.85rem;
405
+ }
406
+
407
+ .comparison-table.active {
408
+ display: table;
409
+ }
410
+
411
+ .comparison-table th, .comparison-table td {
412
+ padding: 8px 12px;
413
+ text-align: right;
414
+ border-bottom: 1px solid #2a3a4a;
415
+ }
416
+
417
+ .comparison-table th {
418
+ color: #a0a0a0;
419
+ font-weight: normal;
420
+ }
421
+
422
+ .comparison-table td {
423
+ color: #fff;
424
+ }
425
+
426
+ /* Responsive */
427
+ @media (max-width: 768px) {
428
+ .header {
429
+ flex-direction: column;
430
+ gap: 15px;
431
+ }
432
+
433
+ .nav-links {
434
+ flex-wrap: wrap;
435
+ justify-content: center;
436
+ }
437
+
438
+ .controls {
439
+ flex-direction: column;
440
+ align-items: stretch;
441
+ }
442
+
443
+ .pair-users {
444
+ flex-direction: column;
445
+ text-align: center;
446
+ }
447
+ }
448
+ </style>
449
+ </head>
450
+ <body>
451
+ <!-- Password Modal -->
452
+ <div class="modal-overlay" id="password-modal">
453
+ <div class="modal">
454
+ <h2>ืื–ื•ืจ ืžื•ื’ืŸ</h2>
455
+ <p style="color: #a0a0a0; margin-bottom: 20px;">ื”ื–ืŸ ืกื™ืกืžื” ื›ื“ื™ ืœื”ื™ื›ื ืก ืœืื–ื•ืจ ื”ืชื—ื–ื•ืงื”</p>
456
+ <input type="password" id="password-input" placeholder="******" maxlength="10" autofocus>
457
+ <button onclick="checkPassword()">ื›ื ื™ืกื”</button>
458
+ <p class="error hidden" id="password-error">ืกื™ืกืžื” ืฉื’ื•ื™ื”</p>
459
+ </div>
460
+ </div>
461
+
462
+ <header class="header">
463
+ <h1>ืชื—ื–ื•ืงื”</h1>
464
+ <nav class="nav-links">
465
+ <a href="/">ืกื˜ื˜ื™ืกื˜ื™ืงื•ืช</a>
466
+ <a href="/chat">ืฆ'ืื˜</a>
467
+ <a href="/ai-search">AI Search</a>
468
+ <a href="/maintenance" class="active">ืชื—ื–ื•ืงื”</a>
469
+ </nav>
470
+ </header>
471
+
472
+ <main class="main-content locked" id="main-content">
473
+ <!-- Stylometry Analysis Section -->
474
+ <section class="section">
475
+ <h2>ื–ื™ื”ื•ื™ ืžืฉืชืžืฉื™ื ื›ืคื•ืœื™ื (Stylometry)</h2>
476
+ <p>
477
+ ื”ืžืขืจื›ืช ืžื ืชื—ืช ืืช ืกื’ื ื•ืŸ ื”ื›ืชื™ื‘ื” ืฉืœ ื›ืœ ืžืฉืชืžืฉ ื›ื“ื™ ืœื–ื”ื•ืช ื—ืฉื‘ื•ื ื•ืช ืฉื™ื™ืชื›ืŸ ื•ืฉื™ื™ื›ื™ื ืœืื•ืชื• ืื“ื.
478
+ ื”ื ื™ืชื•ื— ื›ื•ืœืœ: ืื•ืจืš ื”ื•ื“ืขื•ืช, ืฉื™ืžื•ืฉ ื‘ืื™ืžื•ื’'ื™, ืกื™ืžื ื™ ืคื™ืกื•ืง, ื™ื—ืก ืขื‘ืจื™ืช/ืื ื’ืœื™ืช, ืฉืขื•ืช ืคืขื™ืœื•ืช ื•ืขื•ื“.
479
+ </p>
480
+
481
+ <div class="controls">
482
+ <div class="control-group">
483
+ <label>ืžื™ื ื™ืžื•ื ื”ื•ื“ืขื•ืช</label>
484
+ <input type="number" id="min-messages" value="300" min="50" max="10000">
485
+ </div>
486
+ <div class="control-group">
487
+ <label>ืชืงื•ืคื” (ื™ืžื™ื)</label>
488
+ <input type="number" id="days" value="365" min="30" max="730">
489
+ </div>
490
+ <div class="control-group">
491
+ <label>ืกืฃ ื“ืžื™ื•ืŸ (%)</label>
492
+ <input type="number" id="threshold" value="85" min="50" max="99">
493
+ </div>
494
+ </div>
495
+
496
+ <button class="btn-primary" id="analyze-btn" onclick="startAnalysis()">
497
+ ื”ืชื—ืœ ื ื™ืชื•ื—
498
+ </button>
499
+
500
+ <!-- Progress -->
501
+ <div class="progress-container" id="progress-container">
502
+ <div class="progress-bar">
503
+ <div class="progress-fill" id="progress-fill"></div>
504
+ </div>
505
+ <p class="progress-text" id="progress-text">ืžืชื—ื™ืœ ื ื™ืชื•ื—...</p>
506
+ </div>
507
+ </section>
508
+
509
+ <!-- Results Section -->
510
+ <section class="section results-container" id="results-container">
511
+ <h2>ืชื•ืฆืื•ืช ื”ื ื™ืชื•ื—</h2>
512
+
513
+ <div class="stats-grid" id="stats-grid">
514
+ <!-- Filled by JS -->
515
+ </div>
516
+
517
+ <div id="pairs-container">
518
+ <!-- Filled by JS -->
519
+ </div>
520
+ </section>
521
+ </main>
522
+
523
+ <script>
524
+ const CORRECT_PASSWORD = '8716156';
525
+ let analysisResults = null;
526
+
527
+ // Check if already authenticated (session storage)
528
+ if (sessionStorage.getItem('maintenance_auth') === 'true') {
529
+ unlockContent();
530
+ }
531
+
532
+ // Enter key for password
533
+ document.getElementById('password-input').addEventListener('keypress', (e) => {
534
+ if (e.key === 'Enter') checkPassword();
535
+ });
536
+
537
+ function checkPassword() {
538
+ const input = document.getElementById('password-input');
539
+ const error = document.getElementById('password-error');
540
+
541
+ if (input.value === CORRECT_PASSWORD) {
542
+ sessionStorage.setItem('maintenance_auth', 'true');
543
+ unlockContent();
544
+ } else {
545
+ error.classList.remove('hidden');
546
+ input.value = '';
547
+ input.focus();
548
+ }
549
+ }
550
+
551
+ function unlockContent() {
552
+ document.getElementById('password-modal').classList.add('hidden');
553
+ document.getElementById('main-content').classList.remove('locked');
554
+ }
555
+
556
+ async function startAnalysis() {
557
+ const minMessages = parseInt(document.getElementById('min-messages').value);
558
+ const days = parseInt(document.getElementById('days').value);
559
+ const threshold = parseInt(document.getElementById('threshold').value);
560
+
561
+ const btn = document.getElementById('analyze-btn');
562
+ const progress = document.getElementById('progress-container');
563
+ const progressFill = document.getElementById('progress-fill');
564
+ const progressText = document.getElementById('progress-text');
565
+
566
+ btn.disabled = true;
567
+ progress.classList.add('active');
568
+ progressFill.style.width = '0%';
569
+ progressText.textContent = 'ืžืชื—ื™ืœ ื ื™ืชื•ื—...';
570
+
571
+ try {
572
+ // Start the analysis
573
+ const response = await fetch('/api/stylometry/analyze', {
574
+ method: 'POST',
575
+ headers: { 'Content-Type': 'application/json' },
576
+ body: JSON.stringify({
577
+ min_messages: minMessages,
578
+ days: days,
579
+ threshold: threshold / 100
580
+ })
581
+ });
582
+
583
+ if (!response.ok) {
584
+ throw new Error('Analysis failed');
585
+ }
586
+
587
+ // Poll for progress
588
+ let completed = false;
589
+ while (!completed) {
590
+ await new Promise(r => setTimeout(r, 500));
591
+
592
+ const statusRes = await fetch('/api/stylometry/status');
593
+ const status = await statusRes.json();
594
+
595
+ if (status.status === 'completed') {
596
+ completed = true;
597
+ analysisResults = status.results;
598
+ progressFill.style.width = '100%';
599
+ progressText.textContent = 'ื”ื ื™ืชื•ื— ื”ื•ืฉืœื!';
600
+ } else if (status.status === 'error') {
601
+ throw new Error(status.error);
602
+ } else if (status.status === 'running') {
603
+ const pct = status.progress || 0;
604
+ progressFill.style.width = pct + '%';
605
+ progressText.textContent = status.message || 'ืžืขื‘ื“...';
606
+ }
607
+ }
608
+
609
+ // Show results
610
+ setTimeout(() => {
611
+ progress.classList.remove('active');
612
+ displayResults(analysisResults);
613
+ }, 500);
614
+
615
+ } catch (error) {
616
+ progressText.textContent = 'ืฉื’ื™ืื”: ' + error.message;
617
+ progressFill.style.width = '0%';
618
+ } finally {
619
+ btn.disabled = false;
620
+ }
621
+ }
622
+
623
+ function displayResults(data) {
624
+ const container = document.getElementById('results-container');
625
+ const statsGrid = document.getElementById('stats-grid');
626
+ const pairsContainer = document.getElementById('pairs-container');
627
+
628
+ container.classList.add('active');
629
+
630
+ // Stats
631
+ statsGrid.innerHTML = `
632
+ <div class="stat-card">
633
+ <div class="value">${data.total_users_analyzed}</div>
634
+ <div class="label">ืžืฉืชืžืฉื™ื ื ื‘ื“ืงื•</div>
635
+ </div>
636
+ <div class="stat-card">
637
+ <div class="value">${data.potential_duplicates}</div>
638
+ <div class="label">ื—ืฉื•ื“ื™ื ื›ื›ืคื•ืœื™ื</div>
639
+ </div>
640
+ <div class="stat-card">
641
+ <div class="value">${data.threshold}%</div>
642
+ <div class="label">ืกืฃ ื“ืžื™ื•ืŸ</div>
643
+ </div>
644
+ `;
645
+
646
+ // Pairs
647
+ if (data.pairs.length === 0) {
648
+ pairsContainer.innerHTML = `
649
+ <div class="no-duplicates">
650
+ <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
651
+ <path d="M22 11.08V12a10 10 0 1 1-5.93-9.14"/>
652
+ <polyline points="22 4 12 14.01 9 11.01"/>
653
+ </svg>
654
+ <h3>ืœื ื ืžืฆืื• ืžืฉืชืžืฉื™ื ื›ืคื•ืœื™ื!</h3>
655
+ <p>ื›ืœ ื”ืžืฉืชืžืฉื™ื ืฉื ื‘ื“ืงื• ื ืจืื™ื ื™ื™ื—ื•ื“ื™ื™ื</p>
656
+ </div>
657
+ `;
658
+ } else {
659
+ let pairsHTML = '<div class="pairs-list">';
660
+
661
+ for (const pair of data.pairs) {
662
+ const level = pair.similarity >= 95 ? 'high' : (pair.similarity >= 90 ? 'medium' : 'low');
663
+ const pairId = `pair-${pair.user1.user_id}-${pair.user2.user_id}`;
664
+
665
+ pairsHTML += `
666
+ <div class="pair-card ${level}">
667
+ <div class="pair-header">
668
+ <div class="pair-users">
669
+ <div class="user-info">
670
+ <span class="user-name">${escapeHtml(pair.user1.user_name)}</span>
671
+ <span class="user-stats">${pair.user1.message_count} ื”ื•ื“ืขื•ืช</span>
672
+ </div>
673
+ <span class="vs-badge">VS</span>
674
+ <div class="user-info">
675
+ <span class="user-name">${escapeHtml(pair.user2.user_name)}</span>
676
+ <span class="user-stats">${pair.user2.message_count} ื”ื•ื“ืขื•ืช</span>
677
+ </div>
678
+ </div>
679
+ <div class="similarity-badge">${pair.similarity}%</div>
680
+ </div>
681
+ <div class="pair-details">
682
+ ${pair.details.map(d => `<span class="detail-tag">${d}</span>`).join('')}
683
+ </div>
684
+ <button class="comparison-toggle" onclick="toggleComparison('${pairId}')">
685
+ ื”ืฆื’ ื”ืฉื•ื•ืื” ืžืคื•ืจื˜ืช
686
+ </button>
687
+ <table class="comparison-table" id="${pairId}">
688
+ <tr>
689
+ <th>ืžื“ื“</th>
690
+ <th>${escapeHtml(pair.user1.user_name)}</th>
691
+ <th>${escapeHtml(pair.user2.user_name)}</th>
692
+ </tr>
693
+ <tr>
694
+ <td>ืื•ืจืš ื”ื•ื“ืขื” ืžืžื•ืฆืข</td>
695
+ <td>${pair.user1.avg_message_length}</td>
696
+ <td>${pair.user2.avg_message_length}</td>
697
+ </tr>
698
+ <tr>
699
+ <td>ื™ื—ืก ืขื‘ืจื™ืช</td>
700
+ <td>${(pair.user1.hebrew_ratio * 100).toFixed(1)}%</td>
701
+ <td>${(pair.user2.hebrew_ratio * 100).toFixed(1)}%</td>
702
+ </tr>
703
+ <tr>
704
+ <td>ื™ื—ืก ืื ื’ืœื™ืช</td>
705
+ <td>${(pair.user1.english_ratio * 100).toFixed(1)}%</td>
706
+ <td>${(pair.user2.english_ratio * 100).toFixed(1)}%</td>
707
+ </tr>
708
+ <tr>
709
+ <td>ืฉื™ืžื•ืฉ ื‘ืื™ืžื•ื’'ื™</td>
710
+ <td>${(pair.user1.emoji_ratio * 100).toFixed(2)}%</td>
711
+ <td>${(pair.user2.emoji_ratio * 100).toFixed(2)}%</td>
712
+ </tr>
713
+ <tr>
714
+ <td>ืกื™ืžื ื™ ืฉืืœื” (ืœื›ืœ ื”ื•ื“ืขื”)</td>
715
+ <td>${pair.user1.question_rate.toFixed(2)}</td>
716
+ <td>${pair.user2.question_rate.toFixed(2)}</td>
717
+ </tr>
718
+ <tr>
719
+ <td>ืกื™ืžื ื™ ืงืจื™ืื” (ืœื›ืœ ื”ื•ื“ืขื”)</td>
720
+ <td>${pair.user1.exclamation_rate.toFixed(2)}</td>
721
+ <td>${pair.user2.exclamation_rate.toFixed(2)}</td>
722
+ </tr>
723
+ <tr>
724
+ <td>ืคืขื™ืœื•ืช ื‘ืกื•ืค"ืฉ</td>
725
+ <td>${(pair.user1.weekend_ratio * 100).toFixed(1)}%</td>
726
+ <td>${(pair.user2.weekend_ratio * 100).toFixed(1)}%</td>
727
+ </tr>
728
+ </table>
729
+ </div>
730
+ `;
731
+ }
732
+
733
+ pairsHTML += '</div>';
734
+ pairsContainer.innerHTML = pairsHTML;
735
+ }
736
+ }
737
+
738
+ function toggleComparison(id) {
739
+ const table = document.getElementById(id);
740
+ table.classList.toggle('active');
741
+ }
742
+
743
+ function escapeHtml(text) {
744
+ const div = document.createElement('div');
745
+ div.textContent = text;
746
+ return div.innerHTML;
747
+ }
748
+ </script>
749
+ </body>
750
+ </html>