rottg commited on
Commit
a99d4dc
·
1 Parent(s): c703ef2

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ telegram.db filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install dependencies
6
+ COPY requirements.txt .
7
+ RUN pip install --no-cache-dir -r requirements.txt
8
+
9
+ # Copy application code
10
+ COPY dashboard.py .
11
+ COPY ai_search.py .
12
+ COPY algorithms.py .
13
+ COPY data_structures.py .
14
+ COPY indexer.py .
15
+ COPY search.py .
16
+ COPY semantic_search.py .
17
+ COPY schema.sql .
18
+ COPY static/ static/
19
+ COPY templates/ templates/
20
+
21
+ # Copy database
22
+ COPY telegram.db .
23
+
24
+ # HF Spaces uses port 7860
25
+ ENV PORT=7860
26
+ ENV HOST=0.0.0.0
27
+ ENV DB_PATH=telegram.db
28
+
29
+ EXPOSE 7860
30
+
31
+ CMD ["gunicorn", "dashboard:app", "--bind", "0.0.0.0:7860", "--workers", "2", "--timeout", "120"]
README.md CHANGED
@@ -1,11 +1,1037 @@
1
- ---
2
- title: Telegram Analytics
3
- emoji: 💻
4
- colorFrom: purple
5
- colorTo: green
6
- sdk: docker
7
- pinned: false
8
- short_description: telegram-analytics
9
- ---
10
-
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Telegram Analytics Dashboard
3
+ emoji: 📊
4
+ colorFrom: blue
5
+ colorTo: indigo
6
+ sdk: docker
7
+ app_port: 7860
8
+ ---
9
+
10
+ # Telegram JSON Indexer & Analyzer
11
+
12
+ A high-performance system for indexing, searching, and analyzing Telegram chat exports using SQLite FTS5 and advanced algorithms from Data Structures course. Includes a full-featured **Web Dashboard** with **AI-powered search**.
13
+
14
+ ```
15
+ ╔══════════════════════════════════════════════════════════════════════════════╗
16
+ ║ TELEGRAM CHAT ANALYZER ║
17
+ ║ ║
18
+ ║ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────────────────────┐ ║
19
+ ║ │ JSON │───▶│ INDEXER │───▶│ SQLite │───▶│ WEB DASHBOARD │ ║
20
+ ║ │ Export │ │ Bloom │ │ + FTS5 │ │ ┌─────┬─────┬─────┐ │ ║
21
+ ║ │ │ │ Filter │ │ │ │ │Stats│Users│Chat │ │ ║
22
+ ║ └─────────┘ └─────────┘ └─────────┘ │ ├─────┼─────┼─────┤ │ ║
23
+ ║ │ │Search│ AI │Mod │ │ ║
24
+ ║ │ └─────┴─────┴─────┘ │ ║
25
+ ║ └─────────────────────────┘ ║
26
+ ╚══════════════════════════════════════════════════════════════════════════════╝
27
+ ```
28
+
29
+ ## Features
30
+
31
+ ### Core Features
32
+ - **Full-Text Search** - Fast search with Hebrew support using SQLite FTS5
33
+ - **Fuzzy Search** - Find messages even with typos using trigram similarity
34
+ - **Similar Message Detection** - LCS algorithm finds duplicates/reposts
35
+ - **Conversation Threads** - DFS/BFS traversal reconstructs reply chains
36
+ - **User Rankings** - O(log n) rank queries using AVL Rank Tree
37
+ - **Time Analytics** - Bucket Sort for efficient histograms
38
+ - **Top-K Queries** - Heap-based O(n log k) instead of O(n log n)
39
+ - **Percentiles** - O(n) median/percentiles using Selection algorithm
40
+
41
+ ### Web Dashboard
42
+ - **Interactive Overview** - Charts, stats, activity graphs
43
+ - **User Leaderboard** - Rankings with detailed user profiles
44
+ - **Telegram-like Chat View** - Browse all messages like in Telegram
45
+ - **Advanced Search** - Full-text + fuzzy search with filters
46
+ - **AI-Powered Search** - Natural language queries (Hebrew/English)
47
+ - **Moderation Analytics** - Links, mentions, domains analysis
48
+ - **Database Updates** - Upload new JSON files via web UI
49
+
50
+ ### AI Search (Free Providers)
51
+ - **Ollama** - Local LLM (recommended, 100% free)
52
+ - **Groq** - Free API tier available
53
+ - **Google Gemini** - Free API tier available
54
+
55
+ ---
56
+
57
+ ## Table of Contents
58
+
59
+ 1. [Installation](#installation)
60
+ 2. [Quick Start](#quick-start)
61
+ 3. [Web Dashboard](#web-dashboard)
62
+ 4. [AI Search](#ai-search)
63
+ 5. [Database Updates](#database-updates)
64
+ 6. [Architecture](#architecture)
65
+ 7. [Usage Guide](#usage-guide)
66
+ 8. [Algorithms](#algorithms)
67
+ 9. [API Reference](#api-reference)
68
+ 10. [Examples](#examples)
69
+
70
+ ---
71
+
72
+ ## Installation
73
+
74
+ ### Requirements
75
+
76
+ - Python 3.10 or higher
77
+ - No external packages required for core functionality
78
+
79
+ ### Setup
80
+
81
+ ```bash
82
+ # Clone or download the project
83
+ cd telegram
84
+
85
+ # Verify Python version
86
+ python --version # Should be 3.10+
87
+
88
+ # Test the system
89
+ python algorithms.py # Should print "ALL TESTS PASSED!"
90
+ ```
91
+
92
+ ### Optional: Semantic Search
93
+
94
+ For AI-powered semantic similarity search:
95
+
96
+ ```bash
97
+ pip install numpy faiss-cpu sentence-transformers
98
+ ```
99
+
100
+ ---
101
+
102
+ ## Quick Start
103
+
104
+ ### Step 1: Export from Telegram
105
+
106
+ 1. Open Telegram Desktop
107
+ 2. Go to any chat/group
108
+ 3. Click ⋮ → Export Chat History
109
+ 4. Select JSON format
110
+ 5. Save as `result.json`
111
+
112
+ ### Step 2: Index Your Data
113
+
114
+ ```bash
115
+ python indexer.py result.json --db telegram.db
116
+ ```
117
+
118
+ ### Step 3: Launch Web Dashboard
119
+
120
+ ```bash
121
+ # Start the dashboard (recommended)
122
+ python dashboard.py
123
+
124
+ # Open in browser: http://localhost:5000
125
+ ```
126
+
127
+ ### Step 4: Search & Analyze (CLI)
128
+
129
+ ```bash
130
+ # Search messages
131
+ python search.py "שלום"
132
+
133
+ # View statistics
134
+ python analyzer.py --stats
135
+
136
+ # Find similar messages
137
+ python analyzer.py --similar
138
+ ```
139
+
140
+ ---
141
+
142
+ ## Web Dashboard
143
+
144
+ The web dashboard provides a complete visual interface for analyzing your Telegram data.
145
+
146
+ ### Starting the Dashboard
147
+
148
+ ```bash
149
+ python dashboard.py
150
+ # Or with custom port:
151
+ python dashboard.py --port 8080
152
+ ```
153
+
154
+ ### Dashboard Pages
155
+
156
+ ```
157
+ ┌─────────────────────────────────────────────────────────────────────────┐
158
+ │ WEB DASHBOARD │
159
+ ├─────────────────────────────────────────────────────────────────────────┤
160
+ │ │
161
+ │ 📈 Overview │ Main statistics, charts, activity graphs │
162
+ │ │ - Total messages, users, links, media │
163
+ │ │ - Daily/hourly activity charts │
164
+ │ │ - Top users leaderboard │
165
+ │ │
166
+ │ 👥 Users │ User leaderboard with detailed profiles │
167
+ │ │ - Ranking by message count │
168
+ │ │ - User details modal (hourly activity) │
169
+ │ │ - Export users to CSV │
170
+ │ │
171
+ │ 💬 Chat │ Telegram-like message view │
172
+ │ │ - Browse all messages chronologically │
173
+ │ │ - Filter by user, date, media type │
174
+ │ │ - Click message to view full thread │
175
+ │ │ - AI search with natural language │
176
+ │ │
177
+ │ 🔍 Search │ Advanced search interface │
178
+ │ │ - Full-text search (Hebrew supported) │
179
+ │ │ - AI-powered natural language search │
180
+ │ │ - Boolean operators (AND, OR, NOT) │
181
+ │ │ - Export search results │
182
+ │ │
183
+ │ 🛡️ Moderation │ Content analytics │
184
+ │ │ - Top shared domains │
185
+ │ │ - Most mentioned users │
186
+ │ │ - Link sharers leaderboard │
187
+ │ │ - Word frequency analysis │
188
+ │ │
189
+ │ ⚙️ Settings │ Database management │
190
+ │ │ - View database statistics │
191
+ │ │ - Upload new JSON files │
192
+ │ │ - Automatic duplicate detection │
193
+ │ │
194
+ └─────────────────────────────────────────────────────────────────────────┘
195
+ ```
196
+
197
+ ### Dashboard Features
198
+
199
+ - **Dark Theme** - Modern dark UI, easy on the eyes
200
+ - **RTL Support** - Full Hebrew/Arabic text support
201
+ - **Responsive** - Works on mobile and desktop
202
+ - **Real-time Charts** - Interactive Chart.js visualizations
203
+ - **Export** - Download data as CSV/JSON
204
+
205
+ ---
206
+
207
+ ## AI Search
208
+
209
+ Ask questions about your chat data in natural language (Hebrew or English).
210
+
211
+ ### Setup AI Provider (Free Options)
212
+
213
+ #### Option 1: Ollama (Recommended - 100% Local & Free)
214
+
215
+ ```bash
216
+ # Install Ollama (https://ollama.ai)
217
+ curl -fsSL https://ollama.ai/install.sh | sh
218
+
219
+ # Pull a model
220
+ ollama pull llama3.2
221
+
222
+ # Start Ollama server
223
+ ollama serve
224
+ ```
225
+
226
+ #### Option 2: Groq (Free API Tier)
227
+
228
+ ```bash
229
+ # Get free API key from https://console.groq.com
230
+ export GROQ_API_KEY="your_api_key"
231
+ ```
232
+
233
+ #### Option 3: Google Gemini (Free API Tier)
234
+
235
+ ```bash
236
+ # Get free API key from https://makersuite.google.com/app/apikey
237
+ export GEMINI_API_KEY="your_api_key"
238
+ ```
239
+
240
+ ### AI Search Examples
241
+
242
+ ```
243
+ ┌─────────────────────────────────────────────────────────────────────────┐
244
+ │ 🤖 AI Search - Natural Language Queries │
245
+ ├─────────────────────────────────────────────────────────────────────────┤
246
+ │ │
247
+ │ Query: "מי שלח הכי הרבה הודעות?" │
248
+ │ Answer: המשתמש הפעיל ביותר הוא דני עם 5,432 הודעות │
249
+ │ │
250
+ │ Query: "מתי היו הכי הרבה הודעות?" │
251
+ │ Answer: היום הפעיל ביותר היה 15.03.2024 עם 342 הודעות │
252
+ │ │
253
+ │ Query: "Who mentioned @admin the most?" │
254
+ │ Answer: User "Mike" mentioned @admin 47 times │
255
+ │ │
256
+ │ Query: "הראה הודעות עם קישורים מהשבוע האחרון" │
257
+ │ Answer: נמצאו 23 הודעות עם קישורים... │
258
+ │ │
259
+ └─────────────────────────────────────────────────────────────────────────┘
260
+ ```
261
+
262
+ ### AI Search API
263
+
264
+ ```python
265
+ from ai_search import AISearchEngine
266
+
267
+ # Initialize with Ollama (local)
268
+ ai = AISearchEngine('telegram.db', provider='ollama')
269
+
270
+ # Or with Groq
271
+ ai = AISearchEngine('telegram.db', provider='groq', api_key='your_key')
272
+
273
+ # Search
274
+ result = ai.search("מי הכי פעיל בלילה?")
275
+ print(result['answer']) # Natural language answer
276
+ print(result['sql']) # Generated SQL query
277
+ print(result['results']) # Raw data
278
+ ```
279
+
280
+ ---
281
+
282
+ ## Database Updates
283
+
284
+ Update your database with new JSON exports without losing existing data.
285
+
286
+ ### Via Web UI
287
+
288
+ 1. Go to **Settings** page in the dashboard
289
+ 2. Drag & drop your new `result.json` file
290
+ 3. Wait for processing (duplicate detection automatic)
291
+ 4. See summary of new messages added
292
+
293
+ ### Via CLI
294
+
295
+ ```bash
296
+ # Update existing database with new JSON
297
+ python indexer.py new_export.json --db telegram.db --update
298
+
299
+ # What happens:
300
+ # 1. Loads existing message IDs into Bloom filter (O(n))
301
+ # 2. For each message in JSON:
302
+ # - Check if exists using Bloom filter (O(1))
303
+ # - Only insert if new
304
+ # 3. Re-index FTS if needed
305
+ # 4. Report: X new messages, Y duplicates skipped
306
+ ```
307
+
308
+ ### Incremental Update Process
309
+
310
+ ```
311
+ ┌─────────────────────────────────────────────────────────────────────────┐
312
+ │ INCREMENTAL UPDATE PROCESS │
313
+ ├─────────────────────────────────────────────────────────────────────────┤
314
+ │ │
315
+ │ Existing DB New JSON │
316
+ │ ┌─────────────┐ ┌─────────────┐ │
317
+ │ │ msg_1 ✓ │ │ msg_1 │ → Skip (duplicate) │
318
+ │ │ msg_2 ✓ │ │ msg_2 │ → Skip (duplicate) │
319
+ │ │ msg_3 ✓ │ │ msg_5 NEW │ → Insert │
320
+ │ │ msg_4 ✓ │ │ msg_6 NEW │ → Insert │
321
+ │ └─────────────┘ └─────────────┘ │
322
+ │ │ │ │
323
+ │ │ Bloom Filter │ │
324
+ │ │ ┌───────────┐ │ │
325
+ │ └─────▶│ O(1) test │◀─────────┘ │
326
+ │ └───────────┘ │
327
+ │ │
328
+ │ Result: Only msg_5 and msg_6 added (fast!) │
329
+ │ │
330
+ └─────────────────────────────────────────────────────────────────────────┘
331
+ ```
332
+
333
+ ---
334
+
335
+ ## Architecture
336
+
337
+ ### System Overview
338
+
339
+ ```
340
+ ┌─────────────────────────────────────────────────────────────────┐
341
+ │ INPUT │
342
+ │ ┌─────────────────────────────────────────────────────────┐ │
343
+ │ │ Telegram JSON Export (result.json) │ │
344
+ │ │ ├── messages[] │ │
345
+ │ │ │ ├── id, date, from, text │ │
346
+ │ │ │ ├── reply_to_message_id │ │
347
+ │ │ │ └── text_entities[] (links, mentions) │ │
348
+ │ │ └── ... │ │
349
+ │ └─────────────────────────────────────────────────────────┘ │
350
+ └─────────────────────────┬───────────────────────────────────────┘
351
+
352
+
353
+ ┌─────────────────────────────────────────────────────────────────┐
354
+ │ INDEXER (indexer.py) │
355
+ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
356
+ │ │ Batch │ │ Bloom │ │ Reply │ │
357
+ │ │ Processing │ │ Filter │ │ Graph │ │
358
+ │ │ (1000/tx) │ │ (Dedup O(1))│ │ Builder │ │
359
+ │ └─────────────┘ └─────────────┘ └─────────────┘ │
360
+ └─────────────────────────┬───────────────────────────────────────┘
361
+
362
+
363
+ ┌─────────────────────────────────────────────────────────────────┐
364
+ │ SQLite DATABASE │
365
+ │ ┌─────────────────────────────────────────────────────────┐ │
366
+ │ │ messages │ FTS5 Index │ reply_graph │ │
367
+ │ │ ├── id (PK) │ ├── text_plain │ ├── parent_id │ │
368
+ │ │ ├── text_plain │ └── from_name │ └── child_id │ │
369
+ │ │ ├── from_id │ │ │ │
370
+ │ │ ├── date_unixtime │ entities │ threads │ │
371
+ │ │ └── ... │ ├── links │ └── messages │ │
372
+ │ │ │ └── mentions │ │ │
373
+ │ └─────────────────────────────────────────────────────────┘ │
374
+ └─────────────────────────┬───────────────────────────────────────┘
375
+
376
+ ┌───────────────┼───────────────┐
377
+ ▼ ▼ ▼
378
+ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐
379
+ │ SEARCH │ │ ANALYZER │ │ VECTOR │
380
+ │ (search.py) │ │(analyzer.py)│ │ (optional) │
381
+ │ │ │ │ │ │
382
+ │ • FTS5+BM25 │ │ • Top-K │ │ • FAISS │
383
+ │ • Fuzzy │ │ • LCS │ │ • Semantic │
384
+ │ • Threads │ │ • Rank Tree │ │ • Clustering│
385
+ │ • LRU Cache │ │ • Percentile│ │ │
386
+ └─────────────┘ └─────────────┘ └─────────────┘
387
+ ```
388
+
389
+ ### Data Flow
390
+
391
+ ```
392
+ JSON Message Database Tables Search/Analytics
393
+ ─────────── ─────────────── ────────────────
394
+
395
+ { ┌─────────────┐
396
+ "id": 548795, ───▶ │ messages │ ───▶ Full-text search
397
+ "text": "שלום", └─────────────┘ User filtering
398
+ "from": "User1", Date range queries
399
+ "from_id": "user123", ─▶ ┌─────────────┐
400
+ "date_unixtime": ..., │ users │ ───▶ Top users (Heap)
401
+ └─────────────┘ User rank (Rank Tree)
402
+ "text_entities": [
403
+ {"type": "link", ────▶ ┌─────────────┐
404
+ "text": "url"} │ entities │ ───▶ Link analysis
405
+ ], └─────────────┘ Mention network
406
+
407
+ "reply_to_message_id" ─▶ ┌─────────────┐
408
+ │ reply_graph │ ───▶ Thread DFS/BFS
409
+ } └─────────────┘ Conversation view
410
+ ```
411
+
412
+ ### File Structure
413
+
414
+ ```
415
+ telegram/
416
+
417
+ ├── dashboard.py # 🌐 Web Dashboard (Flask)
418
+ │ └── Routes: /, /users, /chat, /search, /moderation, /settings
419
+ │ └── API: /api/overview, /api/users, /api/search, /api/update, etc.
420
+
421
+ ├── ai_search.py # 🤖 AI-Powered Search
422
+ │ └── AISearchEngine class
423
+ │ ├── Natural language to SQL
424
+ │ ├── Ollama/Groq/Gemini providers
425
+ │ └── Hebrew/English support
426
+
427
+ ├── indexer.py # JSON → SQLite indexer
428
+ │ ├── OptimizedIndexer class
429
+ │ │ ├── Batch processing (100x faster)
430
+ │ │ ├── Bloom filter (duplicate detection)
431
+ │ │ └── Graph builder (reply threads)
432
+ │ └── IncrementalIndexer class
433
+ │ ├── Update existing database
434
+ │ ├── Bloom filter duplicate check
435
+ │ └── Only insert new messages
436
+
437
+ ├── search.py # Search interface
438
+ │ └── TelegramSearch class
439
+ │ ├── FTS5 full-text search
440
+ │ ├── Fuzzy trigram search
441
+ │ ├── LRU query cache
442
+ │ └── DFS/BFS thread traversal
443
+
444
+ ├── analyzer.py # Analytics & statistics
445
+ │ └── TelegramAnalyzer class
446
+ │ ├── LCS similar messages
447
+ │ ├── Heap-based Top-K
448
+ │ ├── Selection percentiles
449
+ │ ├── Rank Tree queries
450
+ │ └── Bucket Sort histograms
451
+
452
+ ├── data_structures.py # Core data structures
453
+ │ ├── BloomFilter # O(1) membership test
454
+ │ ├── Trie # O(k) prefix search
455
+ │ ├── LRUCache # O(1) caching
456
+ │ ├── ReplyGraph # DFS/BFS traversal
457
+ │ └── TrigramIndex # Fuzzy matching
458
+
459
+ ├── algorithms.py # Course algorithms
460
+ │ ├── LCS # Similar message detection
461
+ │ ├── TopK (Heap) # Efficient ranking
462
+ │ ├── Selection # O(n) percentiles
463
+ │ ├── RankTree # O(log n) rank queries
464
+ │ └── BucketSort # Time histograms
465
+
466
+ ├── templates/ # 🎨 HTML Templates
467
+ │ ├── index.html # Overview dashboard
468
+ │ ├── users.html # User leaderboard
469
+ │ ├── chat.html # Telegram-like chat view
470
+ │ ├── search.html # Search interface
471
+ │ ├── moderation.html # Content analytics
472
+ │ └── settings.html # Settings & DB update
473
+
474
+ ├── static/ # 📁 Static assets
475
+ │ ├── css/style.css # Dashboard styles
476
+ │ └── js/dashboard.js # Dashboard scripts
477
+
478
+ ├── vector_search.py # Optional: Semantic search
479
+ │ └── VectorSearch class (requires FAISS)
480
+
481
+ ├── schema.sql # Database schema
482
+ └── telegram.db # SQLite database (created)
483
+ ```
484
+
485
+ ---
486
+
487
+ ## Usage Guide
488
+
489
+ ### Web Dashboard (Recommended)
490
+
491
+ ```bash
492
+ # Start the dashboard
493
+ python dashboard.py
494
+
495
+ # Custom port
496
+ python dashboard.py --port 8080
497
+
498
+ # Custom database
499
+ python dashboard.py --db my_chat.db
500
+ ```
501
+
502
+ ### Indexing
503
+
504
+ ```bash
505
+ # Basic indexing
506
+ python indexer.py result.json
507
+
508
+ # Custom database name
509
+ python indexer.py result.json --db my_chat.db
510
+
511
+ # With trigram index (for fuzzy search)
512
+ python indexer.py result.json --build-trigrams
513
+
514
+ # Larger batch size (faster for big files)
515
+ python indexer.py result.json --batch-size 5000
516
+
517
+ # Update existing database with new JSON (incremental)
518
+ python indexer.py new_export.json --db telegram.db --update
519
+ ```
520
+
521
+ ### Searching
522
+
523
+ ```bash
524
+ # Basic search (Hebrew supported)
525
+ python search.py "שלום"
526
+
527
+ # Search with filters
528
+ python search.py "מילה" --user user123456 --limit 50
529
+
530
+ # Date range
531
+ python search.py "חדשות" --from-date 2024-01-01 --to-date 2024-12-31
532
+
533
+ # Fuzzy search (finds typos)
534
+ python search.py "שלמ" --fuzzy --threshold 0.3
535
+
536
+ # View conversation thread
537
+ python search.py --thread 548795
538
+
539
+ # List all links
540
+ python search.py --list-links
541
+
542
+ # List all mentions
543
+ python search.py --list-mentions
544
+ ```
545
+
546
+ ### Analytics
547
+
548
+ ```bash
549
+ # General statistics
550
+ python analyzer.py --stats
551
+
552
+ # Top users (Heap-based O(n log k))
553
+ python analyzer.py --top-users --limit 10
554
+
555
+ # Hourly activity
556
+ python analyzer.py --hourly
557
+
558
+ # Daily activity
559
+ python analyzer.py --daily
560
+
561
+ # Top words
562
+ python analyzer.py --words --limit 30
563
+
564
+ # Top domains
565
+ python analyzer.py --domains
566
+
567
+ # Find similar messages (LCS algorithm)
568
+ python analyzer.py --similar --threshold 0.7
569
+
570
+ # Find reposts
571
+ python analyzer.py --reposts
572
+
573
+ # Message length percentiles (Selection algorithm)
574
+ python analyzer.py --percentiles
575
+
576
+ # Response time percentiles
577
+ python analyzer.py --response-times
578
+
579
+ # User rank (Rank Tree O(log n))
580
+ python analyzer.py --user-rank user123456
581
+
582
+ # Get user at rank #5
583
+ python analyzer.py --rank 5
584
+
585
+ # Activity histogram (Bucket Sort)
586
+ python analyzer.py --histogram --bucket-size 86400
587
+
588
+ # Export as JSON
589
+ python analyzer.py --stats --json > stats.json
590
+ ```
591
+
592
+ ---
593
+
594
+ ## Algorithms
595
+
596
+ ### Algorithm Complexity Comparison
597
+
598
+ ```
599
+ ┌────────────────────┬─────────────────┬─────────────────┬─────────────┐
600
+ │ Operation │ Naive Method │ Our Algorithm │ Improvement │
601
+ ├────────────────────┼─────────────────┼─────────────────┼─────────────┤
602
+ │ Top-K users │ O(n log n) sort │ O(n log k) heap │ ~10x │
603
+ │ Find median │ O(n log n) sort │ O(n) selection │ ~5x │
604
+ │ User rank query │ O(n) scan │ O(log n) tree │ ~100x │
605
+ │ Duplicate check │ O(n) lookup │ O(1) bloom │ ~1000x │
606
+ │ Similar messages │ O(n²m²) naive │ O(n²m) LCS+DP │ ~10x │
607
+ │ Time histogram │ O(n log n) sort │ O(n+k) bucket │ ~5x │
608
+ │ Thread traversal │ O(n) repeated │ O(V+E) DFS/BFS │ ~10x │
609
+ └────────────────────┴─────────────────┴─────────────────┴─────────────┘
610
+ ```
611
+
612
+ ### 1. LCS (Longest Common Subsequence)
613
+
614
+ **Purpose:** Find similar/duplicate messages
615
+
616
+ ```
617
+ String 1: "שלום לכולם מה קורה"
618
+ String 2: "שלום לכולם מה נשמע"
619
+
620
+ LCS: "שלום לכולם מה "
621
+ Similarity: 77.78%
622
+ ```
623
+
624
+ **Algorithm:**
625
+ ```
626
+ ┌───┬───┬───┬───┬───┬───┐
627
+ │ │ ∅ │ A │ B │ C │ D │ DP Table
628
+ ├───┼───┼───┼───┼───┼───┤
629
+ │ ∅ │ 0 │ 0 │ 0 │ 0 │ 0 │ dp[i][j] = length of LCS
630
+ │ A │ 0 │ 1 │ 1 │ 1 │ 1 │ for first i and j chars
631
+ │ C │ 0 │ 1 │ 1 │ 2 │ 2 │
632
+ │ B │ 0 │ 1 │ 2 │ 2 │ 2 │ Time: O(m × n)
633
+ │ D │ 0 │ 1 │ 2 │ 2 │ 3 │ Space: O(min(m,n))
634
+ └───┴───┴───┴───┴───┴───┘
635
+ ```
636
+
637
+ ### 2. Heap-based Top-K
638
+
639
+ **Purpose:** Find top K items without sorting everything
640
+
641
+ ```
642
+ Finding Top 3 from [5,2,8,1,9,3,7,4,6]
643
+
644
+ Min-Heap (size K=3):
645
+
646
+ Step 1: [5] Add 5
647
+ Step 2: [2,5] Add 2
648
+ Step 3: [2,5,8] Add 8 (heap full)
649
+ Step 4: [2,5,8] Skip 1 (< min)
650
+ Step 5: [5,9,8] Replace 2 with 9
651
+ Step 6: [5,9,8] Skip 3 (< min)
652
+ Step 7: [7,9,8] Replace 5 with 7
653
+ ...
654
+ Result: [7,8,9] Top 3!
655
+
656
+ Time: O(n log k) vs O(n log n) for full sort
657
+ ```
658
+
659
+ ### 3. Selection Algorithm (Median of Medians)
660
+
661
+ **Purpose:** Find k-th element or percentiles in O(n)
662
+
663
+ ```
664
+ Find median of [3,1,4,1,5,9,2,6,5,3,5]
665
+
666
+ ┌─────────────────────────────────────────┐
667
+ │ Divide into groups of 5: │
668
+ │ [3,1,4,1,5] [9,2,6,5,3] [5] │
669
+ │ ↓ ↓ ↓ │
670
+ │ Medians: 3 5 5 │
671
+ │ ↓ │
672
+ │ Median of medians: 5 (pivot) │
673
+ │ ↓ │
674
+ │ Partition around 5 │
675
+ │ [3,1,4,1,2,3] [5,5,5] [9,6] │
676
+ │ 6 elements 3 2 │
677
+ │ ↓ │
678
+ │ Median is at position 5 → found! │
679
+ └─────────────────────────────────────────┘
680
+
681
+ Time: O(n) guaranteed (not just average!)
682
+ ```
683
+
684
+ ### 4. Rank Tree (Order Statistics Tree)
685
+
686
+ **Purpose:** O(log n) rank queries
687
+
688
+ ```
689
+ AVL Tree with size augmentation:
690
+
691
+ ┌───────────────┐
692
+ │ 150 (size=5) │
693
+ └───────┬───────┘
694
+ ┌────────┴────────┐
695
+ ┌─────┴─────┐ ┌─────┴─────┐
696
+ │ 100 (s=2) │ │ 250 (s=2) │
697
+ └─────┬─────┘ └─────┬─────┘
698
+ ┌─────┴ ┌──┴
699
+ ┌───┴───┐ ┌───┴───┐
700
+ │50 (1) │ │300 (1)│
701
+ └───────┘ └───────┘
702
+
703
+ select(3) → 150 (3rd smallest)
704
+ rank(150) → 3 (rank of 150)
705
+
706
+ Time: O(log n) for both operations
707
+ ```
708
+
709
+ ### 5. Bucket Sort (Time Histograms)
710
+
711
+ **Purpose:** O(n+k) time-based grouping
712
+
713
+ ```
714
+ Messages with timestamps:
715
+ [1000, 1500, 2500, 1200, 3000]
716
+
717
+ Bucket size: 1000 seconds
718
+
719
+ ┌─────────┬─────────┬─────────┬─────────┐
720
+ │ 0-1000 │1000-2000│2000-3000│3000-4000│
721
+ ├─────────┼─────────┼─────────┼─────────┤
722
+ │ │ 1000 │ 2500 │ 3000 │
723
+ │ │ 1500 │ │ │
724
+ │ │ 1200 │ │ │
725
+ ├─────────┼─────────┼─────────┼─────────┤
726
+ │ Count:0 │ Count:3 │ Count:1 │ Count:1 │
727
+ └─────────┴─────────┴─────────┴─────────┘
728
+
729
+ Time: O(n + k) where k = number of buckets
730
+ ```
731
+
732
+ ### 6. DFS/BFS Thread Traversal
733
+
734
+ **Purpose:** Reconstruct conversation threads
735
+
736
+ ```
737
+ Reply Graph:
738
+
739
+ [1] Original message
740
+
741
+ ├──[2] Reply to 1
742
+ │ │
743
+ │ ├──[4] Reply to 2
744
+ │ │
745
+ │ └──[5] Reply to 2
746
+
747
+ └──[3] Reply to 1
748
+
749
+ DFS order: [1, 2, 4, 5, 3] (deep first)
750
+ BFS order: [1, 2, 3, 4, 5] (level by level)
751
+
752
+ With depth info:
753
+ [1] depth=0
754
+ [2] depth=1
755
+ [4] depth=2
756
+ [5] depth=2
757
+ [3] depth=1
758
+
759
+ Time: O(V + E)
760
+ ```
761
+
762
+ ---
763
+
764
+ ## API Reference
765
+
766
+ ### Dashboard REST API
767
+
768
+ The web dashboard exposes a REST API for all operations:
769
+
770
+ ```
771
+ ┌─────────────────────────────────────────────────────────────────────────┐
772
+ │ REST API ENDPOINTS │
773
+ ├─────────────────────────────────────────────────────────────────────────┤
774
+ │ │
775
+ │ GET /api/overview Overview statistics │
776
+ │ ?timeframe=month (today|yesterday|week|month|year|all) │
777
+ │ │
778
+ │ GET /api/users User leaderboard │
779
+ │ ?timeframe=month Timeframe filter │
780
+ │ &limit=100 Max users │
781
+ │ │
782
+ │ GET /api/user/<user_id> User details │
783
+ │ ?timeframe=month Includes hourly activity │
784
+ │ │
785
+ │ GET /api/search Full-text search │
786
+ │ ?q=search_term Search query │
787
+ │ &timeframe=all Timeframe filter │
788
+ │ &limit=20&offset=0 Pagination │
789
+ │ │
790
+ │ POST /api/ai/search AI-powered search │
791
+ │ {"query": "..."} Natural language query │
792
+ │ │
793
+ │ GET /api/chat/messages Chat messages │
794
+ │ ?limit=50&offset=0 Pagination │
795
+ │ &user_id=... Filter by user │
796
+ │ &from_date=... Date range │
797
+ │ │
798
+ │ GET /api/chat/thread/<id> Get conversation thread │
799
+ │ Returns full thread with DFS │
800
+ │ │
801
+ │ GET /api/top/domains Top shared domains │
802
+ │ GET /api/top/mentions Top mentioned users │
803
+ │ GET /api/top/words Most frequent words │
804
+ │ │
805
+ │ POST /api/update Update database with JSON │
806
+ │ (multipart form) File upload │
807
+ │ │
808
+ │ GET /api/db/stats Database statistics │
809
+ │ Size, counts, date range │
810
+ │ │
811
+ │ GET /api/export/users Export users as CSV │
812
+ │ GET /api/export/messages Export messages as CSV │
813
+ │ │
814
+ ├─────────────────────────────────────────────────────────────────────────┤
815
+ │ ALGORITHM-POWERED ENDPOINTS │
816
+ ├─────────────────────────────────────────────────────────────────────────┤
817
+ │ │
818
+ │ GET /api/similar/<id> Find similar messages (LCS algorithm) │
819
+ │ ?threshold=0.7 Similarity threshold │
820
+ │ ?limit=10 Max results │
821
+ │ Complexity: O(n*m) n=sample, m=avg length │
822
+ │ │
823
+ │ GET /api/analytics/similar Find all similar pairs in DB │
824
+ │ ?threshold=0.8 Similarity threshold │
825
+ │ Algorithm: LCS O(n² * m) with early termination │
826
+ │ │
827
+ │ GET /api/user/rank/<id> Get user rank (RankTree) │
828
+ │ Complexity: O(log n) vs O(n) SQL scan │
829
+ │ │
830
+ │ GET /api/user/by-rank/<k> Get k-th ranked user (RankTree) │
831
+ │ Algorithm: select(k) O(log n) │
832
+ │ │
833
+ │ GET /api/analytics/histogram Activity histogram (Bucket Sort) │
834
+ │ ?bucket=86400 Bucket size in seconds │
835
+ │ Complexity: O(n + k) k=number of buckets │
836
+ │ │
837
+ │ GET /api/analytics/percentiles Message length stats (Selection) │
838
+ │ Algorithm: Quickselect O(n) guaranteed │
839
+ │ Returns: min,max,median,p25,p75,p90,p95,p99 │
840
+ │ │
841
+ └─────────────────────────────────────────────────────────────────────────┘
842
+ ```
843
+
844
+ ### TelegramSearch
845
+
846
+ ```python
847
+ from search import TelegramSearch
848
+
849
+ with TelegramSearch('telegram.db') as search:
850
+ # Full-text search
851
+ results = search.search("שלום", limit=50)
852
+
853
+ # With filters
854
+ results = search.search(
855
+ "מילה",
856
+ user_id="user123",
857
+ from_date=1704067200, # Unix timestamp
858
+ to_date=1735689600,
859
+ has_links=True
860
+ )
861
+
862
+ # Fuzzy search
863
+ results = search.fuzzy_search("שלמ", threshold=0.3)
864
+
865
+ # Get thread (DFS)
866
+ thread = search.get_thread_dfs(message_id=548795)
867
+
868
+ # Get thread with depth
869
+ thread = search.get_thread_with_depth(message_id=548795)
870
+ # Returns: [(message_dict, depth), ...]
871
+
872
+ # Autocomplete usernames
873
+ suggestions = search.autocomplete_user("@user")
874
+ ```
875
+
876
+ ### TelegramAnalyzer
877
+
878
+ ```python
879
+ from analyzer import TelegramAnalyzer
880
+
881
+ with TelegramAnalyzer('telegram.db') as analyzer:
882
+ # Statistics
883
+ stats = analyzer.get_stats()
884
+
885
+ # Top users (Heap-based)
886
+ top_users = analyzer.get_top_users(limit=10)
887
+
888
+ # Similar messages (LCS)
889
+ similar = analyzer.find_similar_messages(threshold=0.7)
890
+
891
+ # Percentiles (Selection algorithm)
892
+ percentiles = analyzer.get_message_length_stats()
893
+ # Returns: {min, max, median, p25, p75, p90, p95, p99}
894
+
895
+ # User rank (Rank Tree)
896
+ rank_info = analyzer.get_user_rank("user123")
897
+ # Returns: {rank, total_users, percentile}
898
+
899
+ # Get user by rank
900
+ user = analyzer.get_user_by_rank(5)
901
+
902
+ # Histogram (Bucket Sort)
903
+ hist = analyzer.get_activity_histogram(bucket_size=86400)
904
+ ```
905
+
906
+ ---
907
+
908
+ ## Examples
909
+
910
+ ### Example 1: Find Most Active Hours
911
+
912
+ ```python
913
+ from analyzer import TelegramAnalyzer
914
+
915
+ with TelegramAnalyzer('telegram.db') as analyzer:
916
+ hourly = analyzer.get_hourly_activity()
917
+
918
+ # Find peak hour
919
+ peak_hour = max(hourly, key=hourly.get)
920
+ print(f"Most active hour: {peak_hour}:00 ({hourly[peak_hour]} messages)")
921
+ ```
922
+
923
+ ### Example 2: Detect Spam/Reposts
924
+
925
+ ```python
926
+ from analyzer import TelegramAnalyzer
927
+
928
+ with TelegramAnalyzer('telegram.db') as analyzer:
929
+ reposts = analyzer.find_reposts(threshold=0.9)
930
+
931
+ for r in reposts[:10]:
932
+ print(f"Similarity: {r['similarity']:.0%}")
933
+ print(f" User 1: {r['user_1']}")
934
+ print(f" User 2: {r['user_2']}")
935
+ print(f" Text: {r['text_preview'][:50]}...")
936
+ ```
937
+
938
+ ### Example 3: Conversation Thread Analysis
939
+
940
+ ```python
941
+ from search import TelegramSearch
942
+
943
+ with TelegramSearch('telegram.db') as search:
944
+ # Get full thread
945
+ thread = search.get_thread_with_depth(548795)
946
+
947
+ print("Conversation thread:")
948
+ for msg, depth in thread:
949
+ indent = " " * depth
950
+ print(f"{indent}[{msg['from_name']}]: {msg['text_plain'][:50]}")
951
+ ```
952
+
953
+ ### Example 4: User Ranking
954
+
955
+ ```python
956
+ from analyzer import TelegramAnalyzer
957
+
958
+ with TelegramAnalyzer('telegram.db') as analyzer:
959
+ # Get rank of specific user
960
+ rank = analyzer.get_user_rank("user123456")
961
+ print(f"Rank: #{rank['rank']} of {rank['total_users']}")
962
+ print(f"Top {rank['percentile']:.1f}%")
963
+
964
+ # Get top 3 users
965
+ for i in range(1, 4):
966
+ user = analyzer.get_user_by_rank(i)
967
+ print(f"#{i}: {user['name']} ({user['count']} messages)")
968
+ ```
969
+
970
+ ---
971
+
972
+ ## Performance
973
+
974
+ Tested on 100,000 messages:
975
+
976
+ | Operation | Time |
977
+ |-----------|------|
978
+ | Indexing | ~10 seconds |
979
+ | Full-text search | <10ms |
980
+ | Fuzzy search | ~100ms |
981
+ | Top-K (k=20) | ~50ms |
982
+ | User rank query | <1ms |
983
+ | Thread traversal | <5ms |
984
+ | Similar messages (1000 sample) | ~2 seconds |
985
+
986
+ ---
987
+
988
+ ## License
989
+
990
+ MIT License - Free for personal and commercial use.
991
+
992
+ ---
993
+
994
+ ## Contributing
995
+
996
+ 1. Fork the repository
997
+ 2. Create feature branch
998
+ 3. Commit changes
999
+ 4. Push and create PR
1000
+
1001
+ ---
1002
+
1003
+ ## Troubleshooting
1004
+
1005
+ ### "Module not found" error
1006
+ ```bash
1007
+ # Make sure you're in the telegram directory
1008
+ cd /path/to/telegram
1009
+ python indexer.py result.json
1010
+ ```
1011
+
1012
+ ### "Database is locked" error
1013
+ ```bash
1014
+ # Close any other programs using the database
1015
+ # Or use a different database name
1016
+ python indexer.py result.json --db telegram2.db
1017
+ ```
1018
+
1019
+ ### Hebrew text not displaying correctly
1020
+ ```bash
1021
+ # Ensure your terminal supports UTF-8
1022
+ export LANG=en_US.UTF-8
1023
+ ```
1024
+
1025
+ ---
1026
+
1027
+ ## Credits
1028
+
1029
+ Algorithms implemented from "Data Structures and Introduction to Algorithms" course:
1030
+ - LCS (Longest Common Subsequence)
1031
+ - Heap-based Top-K
1032
+ - Selection Algorithm (Median of Medians)
1033
+ - Rank Tree (Order Statistics Tree)
1034
+ - Bucket Sort
1035
+ - DFS/BFS Graph Traversal
1036
+ - Bloom Filter
1037
+ - Trie (Prefix Tree)
ai_search.py ADDED
@@ -0,0 +1,776 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ AI-Powered Search for Telegram Analytics
3
+ Supports: Ollama (local), Groq (free API), Google Gemini (free API)
4
+ """
5
+
6
+ import sqlite3
7
+ import json
8
+ import re
9
+ from datetime import datetime
10
+ from typing import List, Dict, Any, Optional
11
+ import os
12
+
13
+ # Try to import AI libraries
14
+ try:
15
+ import requests
16
+ HAS_REQUESTS = True
17
+ except ImportError:
18
+ HAS_REQUESTS = False
19
+
20
+ try:
21
+ from groq import Groq
22
+ HAS_GROQ = True
23
+ except ImportError:
24
+ HAS_GROQ = False
25
+
26
+ try:
27
+ import google.generativeai as genai
28
+ HAS_GEMINI = True
29
+ except ImportError:
30
+ HAS_GEMINI = False
31
+
32
+
33
+ class AISearchEngine:
34
+ """AI-powered natural language search for Telegram messages."""
35
+
36
+ def __init__(self, db_path: str, provider: str = "ollama", api_key: str = None):
37
+ """
38
+ Initialize AI search engine.
39
+
40
+ Args:
41
+ db_path: Path to SQLite database
42
+ provider: "ollama", "groq", or "gemini"
43
+ api_key: API key for Groq or Gemini (not needed for Ollama)
44
+ """
45
+ self.db_path = db_path
46
+ self.provider = provider
47
+ self.api_key = api_key or os.getenv(f"{provider.upper()}_API_KEY")
48
+
49
+ # Initialize provider
50
+ if provider == "groq" and HAS_GROQ:
51
+ self.client = Groq(api_key=self.api_key)
52
+ self.model = "llama-3.1-70b-versatile"
53
+ elif provider == "gemini" and HAS_GEMINI:
54
+ genai.configure(api_key=self.api_key)
55
+ # Using 2.5 Flash - free tier, fast, good for SQL
56
+ self.client = genai.GenerativeModel("gemini-2.5-flash")
57
+ elif provider == "ollama":
58
+ self.ollama_url = os.getenv("OLLAMA_URL", "http://localhost:11434")
59
+ self.model = os.getenv("OLLAMA_MODEL", "llama3.1")
60
+ else:
61
+ raise ValueError(f"Provider {provider} not available. Install required packages.")
62
+
63
+ def _get_db_schema(self) -> str:
64
+ """Dynamically read schema from the actual database to stay in sync."""
65
+ conn = sqlite3.connect(self.db_path)
66
+ cursor = conn.cursor()
67
+
68
+ # Get all tables and their columns
69
+ cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%' ORDER BY name")
70
+ tables = [row[0] for row in cursor.fetchall()]
71
+
72
+ schema_parts = ["Database Schema:"]
73
+ for table in tables:
74
+ cursor.execute(f"PRAGMA table_info({table})")
75
+ cols = cursor.fetchall()
76
+ col_names = [f"{c[1]} ({c[2]})" for c in cols]
77
+ schema_parts.append(f" - {table}: {', '.join(col_names)}")
78
+
79
+ # Note virtual tables (FTS5) separately
80
+ cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND sql LIKE '%fts5%'")
81
+ fts_tables = [row[0] for row in cursor.fetchall()]
82
+ if fts_tables:
83
+ schema_parts.append(f"\n FTS5 tables (use MATCH for search): {', '.join(fts_tables)}")
84
+
85
+ conn.close()
86
+
87
+ schema_parts.append("""
88
+ Key notes:
89
+ - date_unixtime: Unix timestamp (INTEGER), use for date comparisons
90
+ - date: ISO format string (TEXT), use for display
91
+ - text_plain: Message text content
92
+ - text_length: Character count of the message
93
+ - has_links: 1 if message contains URL, 0 otherwise (note: plural)
94
+ - has_media: 1 if message has any media attachment
95
+ - has_photo: 1 if message has a photo specifically
96
+ - from_id: TEXT user ID (e.g., 'user356173100')
97
+ - For text search: SELECT * FROM messages WHERE id IN (SELECT rowid FROM messages_fts WHERE messages_fts MATCH 'term')
98
+ """)
99
+
100
+ return '\n'.join(schema_parts)
101
+
102
+ def _get_sample_data(self) -> str:
103
+ """Get sample data for context."""
104
+ conn = sqlite3.connect(self.db_path)
105
+ cursor = conn.cursor()
106
+
107
+ # Get user list
108
+ cursor.execute("""
109
+ SELECT from_name, COUNT(*) as cnt
110
+ FROM messages
111
+ WHERE from_name IS NOT NULL
112
+ GROUP BY from_name
113
+ ORDER BY cnt DESC
114
+ LIMIT 10
115
+ """)
116
+ users = cursor.fetchall()
117
+
118
+ # Get date range
119
+ cursor.execute("SELECT MIN(date), MAX(date) FROM messages")
120
+ date_range = cursor.fetchone()
121
+
122
+ conn.close()
123
+
124
+ return f"""
125
+ Top users: {', '.join([u[0] for u in users])}
126
+ Date range: {date_range[0]} to {date_range[1]}
127
+ """
128
+
129
+ def _build_prompt(self, user_query: str) -> str:
130
+ """Build prompt for AI model."""
131
+ schema = self._get_db_schema()
132
+ sample = self._get_sample_data()
133
+
134
+ return f"""You are a SQL query generator for a Telegram chat database.
135
+ Your task is to convert natural language questions into SQLite queries.
136
+
137
+ {schema}
138
+
139
+ {sample}
140
+
141
+ IMPORTANT RULES:
142
+ 1. Return ONLY valid SQLite query, no explanations
143
+ 2. For text search, use: SELECT * FROM messages WHERE id IN (SELECT id FROM messages_fts WHERE messages_fts MATCH 'search_term')
144
+ 3. For Hebrew text, the FTS5 will handle it correctly
145
+ 4. Always include relevant columns like date, from_name, text_plain
146
+ 5. Limit results to 50 unless specified
147
+ 6. For "who" questions, GROUP BY from_name and COUNT(*)
148
+ 7. For "when" questions, include date in SELECT
149
+ 8. For threads/replies, JOIN messages m2 ON m1.reply_to_message_id = m2.id
150
+
151
+ User question: {user_query}
152
+
153
+ SQLite query:"""
154
+
155
+ def _call_ollama(self, prompt: str) -> str:
156
+ """Call Ollama API."""
157
+ if not HAS_REQUESTS:
158
+ raise ImportError("requests library required for Ollama")
159
+
160
+ response = requests.post(
161
+ f"{self.ollama_url}/api/generate",
162
+ json={
163
+ "model": self.model,
164
+ "prompt": prompt,
165
+ "stream": False,
166
+ "options": {
167
+ "temperature": 0.1,
168
+ "num_predict": 500
169
+ }
170
+ },
171
+ timeout=60
172
+ )
173
+ response.raise_for_status()
174
+ return response.json()["response"]
175
+
176
+ def _call_groq(self, prompt: str) -> str:
177
+ """Call Groq API."""
178
+ response = self.client.chat.completions.create(
179
+ model=self.model,
180
+ messages=[{"role": "user", "content": prompt}],
181
+ temperature=0.1,
182
+ max_tokens=500
183
+ )
184
+ return response.choices[0].message.content
185
+
186
+ def _call_gemini(self, prompt: str) -> str:
187
+ """Call Google Gemini API."""
188
+ response = self.client.generate_content(prompt)
189
+ return response.text
190
+
191
+ def _generate_sql(self, user_query: str) -> str:
192
+ """Generate SQL from natural language query."""
193
+ prompt = self._build_prompt(user_query)
194
+
195
+ if self.provider == "ollama":
196
+ response = self._call_ollama(prompt)
197
+ elif self.provider == "groq":
198
+ response = self._call_groq(prompt)
199
+ elif self.provider == "gemini":
200
+ response = self._call_gemini(prompt)
201
+ else:
202
+ raise ValueError(f"Unknown provider: {self.provider}")
203
+
204
+ # Extract SQL from response
205
+ sql = response.strip()
206
+
207
+ # Clean up common issues - handle various code block formats
208
+ sql = re.sub(r'^```\w*\s*', '', sql) # Remove opening ```sql or ```
209
+ sql = re.sub(r'\s*```$', '', sql) # Remove closing ```
210
+ sql = re.sub(r'^```', '', sql, flags=re.MULTILINE) # Remove any remaining ```
211
+ sql = sql.strip()
212
+
213
+ # Try to extract SELECT statement if there's text before it
214
+ select_match = re.search(r'(SELECT\s+.+?)(?:;|$)', sql, re.IGNORECASE | re.DOTALL)
215
+ if select_match:
216
+ sql = select_match.group(1).strip()
217
+
218
+ # Ensure it's a SELECT query for safety
219
+ if not sql.upper().startswith("SELECT"):
220
+ raise ValueError(f"AI generated non-SELECT query: {sql[:100]}")
221
+
222
+ return sql
223
+
224
+ def _execute_sql(self, sql: str) -> List[Dict[str, Any]]:
225
+ """Execute SQL and return results as list of dicts."""
226
+ conn = sqlite3.connect(self.db_path)
227
+ conn.row_factory = sqlite3.Row
228
+ cursor = conn.cursor()
229
+
230
+ try:
231
+ cursor.execute(sql)
232
+ rows = cursor.fetchall()
233
+ results = [dict(row) for row in rows]
234
+ except sqlite3.Error as e:
235
+ results = [{"error": str(e), "sql": sql}]
236
+ finally:
237
+ conn.close()
238
+
239
+ return results
240
+
241
+ def _generate_answer(self, user_query: str, results: List[Dict], sql: str) -> str:
242
+ """Generate natural language answer from results."""
243
+ if not results:
244
+ return "לא נמצאו תוצאות."
245
+
246
+ if "error" in results[0]:
247
+ return f"שגיאה בשאילתה: {results[0]['error']}"
248
+
249
+ # Build answer prompt
250
+ results_str = json.dumps(results[:20], ensure_ascii=False, indent=2)
251
+
252
+ answer_prompt = f"""Based on the following query results, provide a concise answer in Hebrew.
253
+
254
+ User question: {user_query}
255
+
256
+ Query results (JSON):
257
+ {results_str}
258
+
259
+ Total results: {len(results)}
260
+
261
+ Provide a helpful, concise answer in Hebrew. Include specific names, dates, and numbers from the results.
262
+ If showing a list, format it nicely. Keep it brief but informative."""
263
+
264
+ if self.provider == "ollama":
265
+ answer = self._call_ollama(answer_prompt)
266
+ elif self.provider == "groq":
267
+ answer = self._call_groq(answer_prompt)
268
+ elif self.provider == "gemini":
269
+ answer = self._call_gemini(answer_prompt)
270
+
271
+ return answer
272
+
273
+ def context_search(self, query: str, user_name: str = None) -> Dict[str, Any]:
274
+ """
275
+ Hybrid context-aware search - combines FTS5 keyword search with AI reasoning.
276
+
277
+ 1. AI extracts user name and relevant keywords from query
278
+ 2. FTS5 finds messages matching keywords (fast, searches ALL messages)
279
+ 3. AI reads relevant messages and reasons to find the answer
280
+
281
+ Example: "באיזה בית חולים האחות עובדת?"
282
+ - Extracts: user="האחות", keywords=["בית חולים", "עבודה", "מחלקה", "סורוקה", ...]
283
+ - FTS5 finds messages from האחות containing these keywords
284
+ - AI reads and infers the answer
285
+ """
286
+ try:
287
+ conn = sqlite3.connect(self.db_path)
288
+ conn.row_factory = sqlite3.Row
289
+
290
+ # Step 1: AI extracts user name AND relevant keywords
291
+ extract_prompt = f"""Analyze this question and extract:
292
+ 1. USER_NAME: The specific person being asked about (or NONE if not about a specific person)
293
+ 2. KEYWORDS: Hebrew keywords to search for in their messages (related to the question topic)
294
+
295
+ Question: {query}
296
+
297
+ Return in this exact format (one per line):
298
+ USER_NAME: <name or NONE>
299
+ KEYWORDS: <comma-separated keywords in Hebrew>
300
+
301
+ Example for "באיזה בית חולים האחות עובדת?":
302
+ USER_NAME: האחות
303
+ KEYWORDS: בית חולים, עבודה, מחלקה, סורוקה, רמבם, איכילוב, שיבא, הדסה, טיפול נמרץ, אחות
304
+
305
+ Extract:"""
306
+
307
+ if self.provider == "gemini":
308
+ extraction = self._call_gemini(extract_prompt).strip()
309
+ elif self.provider == "groq":
310
+ extraction = self._call_groq(extract_prompt).strip()
311
+ else:
312
+ extraction = self._call_ollama(extract_prompt).strip()
313
+
314
+ # Parse extraction
315
+ user_name = None
316
+ keywords = []
317
+ for line in extraction.split('\n'):
318
+ if line.startswith('USER_NAME:'):
319
+ name = line.replace('USER_NAME:', '').strip()
320
+ if name.upper() != 'NONE' and len(name) < 50:
321
+ user_name = name
322
+ elif line.startswith('KEYWORDS:'):
323
+ kw_str = line.replace('KEYWORDS:', '').strip()
324
+ keywords = [k.strip() for k in kw_str.split(',') if k.strip()]
325
+
326
+ messages = []
327
+
328
+ # Step 2: Hybrid retrieval - FTS5 keyword search + recent messages
329
+ if user_name and keywords:
330
+ # Build FTS5 query for keywords
331
+ fts_query = ' OR '.join(keywords[:10]) # Limit to 10 keywords
332
+
333
+ # Search for messages from user containing keywords
334
+ cursor = conn.execute("""
335
+ SELECT date, from_name, text
336
+ FROM messages
337
+ WHERE from_name LIKE ?
338
+ AND id IN (SELECT id FROM messages_fts WHERE messages_fts MATCH ?)
339
+ ORDER BY date DESC
340
+ LIMIT 100
341
+ """, (f"%{user_name}%", fts_query))
342
+ messages = [dict(row) for row in cursor.fetchall()]
343
+
344
+ # Also add some recent messages for context (might contain relevant info without keywords)
345
+ cursor = conn.execute("""
346
+ SELECT date, from_name, text
347
+ FROM messages
348
+ WHERE from_name LIKE ?
349
+ ORDER BY date DESC
350
+ LIMIT 50
351
+ """, (f"%{user_name}%",))
352
+ recent = [dict(row) for row in cursor.fetchall()]
353
+
354
+ # Combine and deduplicate
355
+ seen_texts = {m['text'] for m in messages if m['text']}
356
+ for m in recent:
357
+ if m['text'] and m['text'] not in seen_texts:
358
+ messages.append(m)
359
+ seen_texts.add(m['text'])
360
+
361
+ elif user_name:
362
+ # No keywords, just get user's messages
363
+ cursor = conn.execute("""
364
+ SELECT date, from_name, text
365
+ FROM messages
366
+ WHERE from_name LIKE ?
367
+ ORDER BY date DESC
368
+ LIMIT 200
369
+ """, (f"%{user_name}%",))
370
+ messages = [dict(row) for row in cursor.fetchall()]
371
+
372
+ elif keywords:
373
+ # No user, search all messages for keywords
374
+ fts_query = ' OR '.join(keywords[:10])
375
+ cursor = conn.execute("""
376
+ SELECT date, from_name, text
377
+ FROM messages
378
+ WHERE id IN (SELECT id FROM messages_fts WHERE messages_fts MATCH ?)
379
+ ORDER BY date DESC
380
+ LIMIT 100
381
+ """, (fts_query,))
382
+ messages = [dict(row) for row in cursor.fetchall()]
383
+
384
+ else:
385
+ # Fallback: recent messages
386
+ cursor = conn.execute("""
387
+ SELECT date, from_name, text
388
+ FROM messages
389
+ WHERE text IS NOT NULL AND text != ''
390
+ ORDER BY date DESC
391
+ LIMIT 100
392
+ """)
393
+ messages = [dict(row) for row in cursor.fetchall()]
394
+
395
+ conn.close()
396
+
397
+ if not messages:
398
+ return {
399
+ "query": query,
400
+ "answer": "לא נמצאו הודעות רלוונטיות",
401
+ "context_messages": 0,
402
+ "keywords_used": keywords,
403
+ "mode": "context_search"
404
+ }
405
+
406
+ # Step 3: AI reasons over the retrieved messages
407
+ context_text = "\n".join([
408
+ f"[{m['date']}] {m['from_name']}: {m['text'][:500]}"
409
+ for m in messages if m['text']
410
+ ])
411
+
412
+ reason_prompt = f"""You are analyzing a Telegram chat history to answer a question.
413
+ Read the messages carefully and infer the answer from context clues.
414
+ The user may not have stated things directly - look for hints, mentions, and implications.
415
+
416
+ Question: {query}
417
+
418
+ Chat messages (sorted by relevance and date):
419
+ {context_text}
420
+
421
+ Based on these messages, answer the question in Hebrew.
422
+ If you can infer information (like workplace, location, profession) from context clues, do so.
423
+ Cite specific messages when possible.
424
+ If you truly cannot find any relevant information, say so.
425
+
426
+ Answer:"""
427
+
428
+ if self.provider == "gemini":
429
+ answer = self._call_gemini(reason_prompt)
430
+ elif self.provider == "groq":
431
+ answer = self._call_groq(reason_prompt)
432
+ else:
433
+ answer = self._call_ollama(reason_prompt)
434
+
435
+ return {
436
+ "query": query,
437
+ "answer": answer,
438
+ "context_user": user_name,
439
+ "context_messages": len(messages),
440
+ "keywords_used": keywords,
441
+ "mode": "context_search"
442
+ }
443
+ except Exception as e:
444
+ return {
445
+ "query": query,
446
+ "error": f"Context search error: {str(e)}",
447
+ "mode": "context_search"
448
+ }
449
+
450
+ def search(self, query: str, generate_answer: bool = True) -> Dict[str, Any]:
451
+ """
452
+ Perform AI-powered search.
453
+
454
+ Args:
455
+ query: Natural language question in Hebrew or English
456
+ generate_answer: Whether to generate natural language answer
457
+
458
+ Returns:
459
+ Dict with sql, results, and optionally answer
460
+ """
461
+ try:
462
+ # Generate SQL
463
+ sql = self._generate_sql(query)
464
+
465
+ # Execute query
466
+ results = self._execute_sql(sql)
467
+
468
+ response = {
469
+ "query": query,
470
+ "sql": sql,
471
+ "results": results,
472
+ "count": len(results)
473
+ }
474
+
475
+ # Generate natural language answer
476
+ if generate_answer and results and "error" not in results[0]:
477
+ response["answer"] = self._generate_answer(query, results, sql)
478
+
479
+ return response
480
+
481
+ except Exception as e:
482
+ return {
483
+ "query": query,
484
+ "error": str(e),
485
+ "results": []
486
+ }
487
+
488
+ def get_thread(self, message_id: int) -> List[Dict[str, Any]]:
489
+ """Get full conversation thread for a message."""
490
+ conn = sqlite3.connect(self.db_path)
491
+ conn.row_factory = sqlite3.Row
492
+ cursor = conn.cursor()
493
+
494
+ thread = []
495
+ visited = set()
496
+
497
+ def get_parent(msg_id):
498
+ """Recursively get parent messages."""
499
+ if msg_id in visited:
500
+ return
501
+ visited.add(msg_id)
502
+
503
+ cursor.execute("""
504
+ SELECT message_id, date, from_name, text, reply_to_message_id
505
+ FROM messages WHERE message_id = ?
506
+ """, (msg_id,))
507
+ row = cursor.fetchone()
508
+
509
+ if row:
510
+ if row['reply_to_message_id']:
511
+ get_parent(row['reply_to_message_id'])
512
+ thread.append(dict(row))
513
+
514
+ def get_children(msg_id):
515
+ """Get all replies to a message."""
516
+ cursor.execute("""
517
+ SELECT message_id, date, from_name, text, reply_to_message_id
518
+ FROM messages WHERE reply_to_message_id = ?
519
+ ORDER BY date
520
+ """, (msg_id,))
521
+
522
+ for row in cursor.fetchall():
523
+ if row['message_id'] not in visited:
524
+ visited.add(row['message_id'])
525
+ thread.append(dict(row))
526
+ get_children(row['message_id'])
527
+
528
+ # Get the original message and its parents
529
+ get_parent(message_id)
530
+
531
+ # Get all replies
532
+ get_children(message_id)
533
+
534
+ conn.close()
535
+
536
+ # Sort by date
537
+ thread.sort(key=lambda x: x['date'])
538
+
539
+ return thread
540
+
541
+ def find_similar_messages(self, message_id: int, limit: int = 10) -> List[Dict[str, Any]]:
542
+ """Find messages similar to the given message using trigrams."""
543
+ conn = sqlite3.connect(self.db_path)
544
+ conn.row_factory = sqlite3.Row
545
+ cursor = conn.cursor()
546
+
547
+ # Get the original message
548
+ cursor.execute("SELECT text FROM messages WHERE message_id = ?", (message_id,))
549
+ row = cursor.fetchone()
550
+
551
+ if not row or not row['text']:
552
+ return []
553
+
554
+ # Use FTS5 to find similar messages
555
+ words = row['text'].split()[:5] # Use first 5 words
556
+ search_term = ' OR '.join(words)
557
+
558
+ cursor.execute("""
559
+ SELECT m.message_id, m.date, m.from_name, m.text
560
+ FROM messages m
561
+ WHERE m.id IN (
562
+ SELECT id FROM messages_fts
563
+ WHERE messages_fts MATCH ?
564
+ )
565
+ AND m.message_id != ?
566
+ LIMIT ?
567
+ """, (search_term, message_id, limit))
568
+
569
+ results = [dict(row) for row in cursor.fetchall()]
570
+ conn.close()
571
+
572
+ return results
573
+
574
+
575
+ class ChatViewer:
576
+ """View chat messages like Telegram."""
577
+
578
+ def __init__(self, db_path: str):
579
+ self.db_path = db_path
580
+
581
+ def get_messages(self,
582
+ offset: int = 0,
583
+ limit: int = 50,
584
+ user_id: str = None,
585
+ search: str = None,
586
+ date_from: str = None,
587
+ date_to: str = None,
588
+ has_media: bool = None,
589
+ has_link: bool = None) -> Dict[str, Any]:
590
+ """
591
+ Get messages with Telegram-like pagination.
592
+
593
+ Returns messages in reverse chronological order (newest first).
594
+ """
595
+ conn = sqlite3.connect(self.db_path)
596
+ conn.row_factory = sqlite3.Row
597
+ cursor = conn.cursor()
598
+
599
+ # Build query
600
+ conditions = []
601
+ params = []
602
+
603
+ if user_id:
604
+ conditions.append("from_id = ?")
605
+ params.append(user_id)
606
+
607
+ if date_from:
608
+ conditions.append("date >= ?")
609
+ params.append(date_from)
610
+
611
+ if date_to:
612
+ conditions.append("date <= ?")
613
+ params.append(date_to)
614
+
615
+ if has_media is not None:
616
+ if has_media:
617
+ conditions.append("media_type IS NOT NULL")
618
+ else:
619
+ conditions.append("media_type IS NULL")
620
+
621
+ if has_link is not None:
622
+ conditions.append("has_link = ?")
623
+ params.append(1 if has_link else 0)
624
+
625
+ # Handle search
626
+ if search:
627
+ conditions.append("""id IN (
628
+ SELECT id FROM messages_fts WHERE messages_fts MATCH ?
629
+ )""")
630
+ params.append(search)
631
+
632
+ where_clause = " AND ".join(conditions) if conditions else "1=1"
633
+
634
+ # Get total count
635
+ cursor.execute(f"SELECT COUNT(*) FROM messages WHERE {where_clause}", params)
636
+ total = cursor.fetchone()[0]
637
+
638
+ # Get messages
639
+ query = f"""
640
+ SELECT
641
+ m.message_id,
642
+ m.date,
643
+ m.from_id,
644
+ m.from_name,
645
+ m.text,
646
+ m.reply_to_message_id,
647
+ m.forwarded_from,
648
+ m.media_type,
649
+ m.has_link,
650
+ m.char_count,
651
+ r.from_name as reply_to_name,
652
+ r.text as reply_to_text
653
+ FROM messages m
654
+ LEFT JOIN messages r ON m.reply_to_message_id = r.message_id
655
+ WHERE {where_clause}
656
+ ORDER BY m.date DESC
657
+ LIMIT ? OFFSET ?
658
+ """
659
+ params.extend([limit, offset])
660
+
661
+ cursor.execute(query, params)
662
+ messages = [dict(row) for row in cursor.fetchall()]
663
+
664
+ conn.close()
665
+
666
+ return {
667
+ "messages": messages,
668
+ "total": total,
669
+ "offset": offset,
670
+ "limit": limit,
671
+ "has_more": offset + limit < total
672
+ }
673
+
674
+ def get_message_context(self, message_id: int, before: int = 10, after: int = 10) -> Dict[str, Any]:
675
+ """Get messages around a specific message (for context view)."""
676
+ conn = sqlite3.connect(self.db_path)
677
+ conn.row_factory = sqlite3.Row
678
+ cursor = conn.cursor()
679
+
680
+ # Get the target message date
681
+ cursor.execute("SELECT date FROM messages WHERE message_id = ?", (message_id,))
682
+ row = cursor.fetchone()
683
+
684
+ if not row:
685
+ return {"messages": [], "target_id": message_id}
686
+
687
+ target_date = row['date']
688
+
689
+ # Get messages before
690
+ cursor.execute("""
691
+ SELECT message_id, date, from_id, from_name, text,
692
+ reply_to_message_id, media_type, has_link
693
+ FROM messages
694
+ WHERE date < ?
695
+ ORDER BY date DESC
696
+ LIMIT ?
697
+ """, (target_date, before))
698
+ before_msgs = list(reversed([dict(row) for row in cursor.fetchall()]))
699
+
700
+ # Get target message
701
+ cursor.execute("""
702
+ SELECT message_id, date, from_id, from_name, text,
703
+ reply_to_message_id, media_type, has_link
704
+ FROM messages
705
+ WHERE message_id = ?
706
+ """, (message_id,))
707
+ target_msg = dict(cursor.fetchone())
708
+
709
+ # Get messages after
710
+ cursor.execute("""
711
+ SELECT message_id, date, from_id, from_name, text,
712
+ reply_to_message_id, media_type, has_link
713
+ FROM messages
714
+ WHERE date > ?
715
+ ORDER BY date ASC
716
+ LIMIT ?
717
+ """, (target_date, after))
718
+ after_msgs = [dict(row) for row in cursor.fetchall()]
719
+
720
+ conn.close()
721
+
722
+ return {
723
+ "messages": before_msgs + [target_msg] + after_msgs,
724
+ "target_id": message_id
725
+ }
726
+
727
+ def get_user_conversation(self, user1_id: str, user2_id: str, limit: int = 100) -> List[Dict]:
728
+ """Get conversation between two users (their replies to each other)."""
729
+ conn = sqlite3.connect(self.db_path)
730
+ conn.row_factory = sqlite3.Row
731
+ cursor = conn.cursor()
732
+
733
+ cursor.execute("""
734
+ SELECT m1.message_id, m1.date, m1.from_id, m1.from_name, m1.text,
735
+ m1.reply_to_message_id, m2.from_name as reply_to_name
736
+ FROM messages m1
737
+ LEFT JOIN messages m2 ON m1.reply_to_message_id = m2.message_id
738
+ WHERE (m1.from_id = ? AND m2.from_id = ?)
739
+ OR (m1.from_id = ? AND m2.from_id = ?)
740
+ ORDER BY m1.date DESC
741
+ LIMIT ?
742
+ """, (user1_id, user2_id, user2_id, user1_id, limit))
743
+
744
+ results = [dict(row) for row in cursor.fetchall()]
745
+ conn.close()
746
+
747
+ return results
748
+
749
+
750
+ # CLI for testing
751
+ if __name__ == "__main__":
752
+ import argparse
753
+
754
+ parser = argparse.ArgumentParser(description="AI-powered Telegram search")
755
+ parser.add_argument("--db", required=True, help="Database path")
756
+ parser.add_argument("--provider", default="ollama", choices=["ollama", "groq", "gemini"])
757
+ parser.add_argument("--query", help="Search query")
758
+ parser.add_argument("--api-key", help="API key for cloud providers")
759
+
760
+ args = parser.parse_args()
761
+
762
+ if args.query:
763
+ engine = AISearchEngine(args.db, args.provider, args.api_key)
764
+ result = engine.search(args.query)
765
+
766
+ print(f"\nQuery: {result['query']}")
767
+ print(f"SQL: {result.get('sql', 'N/A')}")
768
+ print(f"Results: {result.get('count', 0)}")
769
+
770
+ if 'answer' in result:
771
+ print(f"\nAnswer:\n{result['answer']}")
772
+
773
+ if result.get('results'):
774
+ print(f"\nFirst 3 results:")
775
+ for r in result['results'][:3]:
776
+ print(json.dumps(r, ensure_ascii=False, indent=2))
algorithms.py ADDED
@@ -0,0 +1,819 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Advanced Algorithms Module for Telegram Chat Analysis
4
+
5
+ Implements algorithms from Data Structures course:
6
+ - LCS (Longest Common Subsequence) - Similar message detection
7
+ - Heap-based Top-K - Efficient ranking without full sort
8
+ - Selection Algorithm (Median of Medians) - O(n) percentiles
9
+ - Rank Tree (Order Statistics Tree) - O(log n) rank queries
10
+ - Bucket Sort - O(n) time-based histograms
11
+
12
+ All algorithms are optimized for the chat indexing use case.
13
+ """
14
+
15
+ import heapq
16
+ from typing import Any, Callable, Generator, Optional
17
+ from collections import defaultdict
18
+ from dataclasses import dataclass, field
19
+
20
+
21
+ # ============================================
22
+ # LCS - LONGEST COMMON SUBSEQUENCE
23
+ # ============================================
24
+
25
+ def lcs_length(s1: str, s2: str) -> int:
26
+ """
27
+ Calculate length of Longest Common Subsequence.
28
+
29
+ Time: O(m * n)
30
+ Space: O(min(m, n)) - optimized to use less space
31
+
32
+ Use case: Measure similarity between two messages.
33
+ """
34
+ # Ensure s1 is the shorter string for space optimization
35
+ if len(s1) > len(s2):
36
+ s1, s2 = s2, s1
37
+
38
+ m, n = len(s1), len(s2)
39
+
40
+ # Use two rows instead of full matrix
41
+ prev = [0] * (m + 1)
42
+ curr = [0] * (m + 1)
43
+
44
+ for j in range(1, n + 1):
45
+ for i in range(1, m + 1):
46
+ if s1[i-1] == s2[j-1]:
47
+ curr[i] = prev[i-1] + 1
48
+ else:
49
+ curr[i] = max(prev[i], curr[i-1])
50
+ prev, curr = curr, prev
51
+
52
+ return prev[m]
53
+
54
+
55
+ def lcs_string(s1: str, s2: str) -> str:
56
+ """
57
+ Find the actual Longest Common Subsequence string.
58
+
59
+ Time: O(m * n)
60
+ Space: O(m * n)
61
+
62
+ Use case: Find common content between messages.
63
+ """
64
+ m, n = len(s1), len(s2)
65
+
66
+ # Build full DP table
67
+ dp = [[0] * (n + 1) for _ in range(m + 1)]
68
+
69
+ for i in range(1, m + 1):
70
+ for j in range(1, n + 1):
71
+ if s1[i-1] == s2[j-1]:
72
+ dp[i][j] = dp[i-1][j-1] + 1
73
+ else:
74
+ dp[i][j] = max(dp[i-1][j], dp[i][j-1])
75
+
76
+ # Backtrack to find the actual subsequence
77
+ result = []
78
+ i, j = m, n
79
+ while i > 0 and j > 0:
80
+ if s1[i-1] == s2[j-1]:
81
+ result.append(s1[i-1])
82
+ i -= 1
83
+ j -= 1
84
+ elif dp[i-1][j] > dp[i][j-1]:
85
+ i -= 1
86
+ else:
87
+ j -= 1
88
+
89
+ return ''.join(reversed(result))
90
+
91
+
92
+ def lcs_similarity(s1: str, s2: str) -> float:
93
+ """
94
+ Calculate LCS-based similarity ratio between two strings.
95
+
96
+ Returns value between 0 (no similarity) and 1 (identical).
97
+
98
+ Use case: Detect near-duplicate messages, reposts.
99
+ """
100
+ if not s1 or not s2:
101
+ return 0.0
102
+
103
+ lcs_len = lcs_length(s1, s2)
104
+ max_len = max(len(s1), len(s2))
105
+
106
+ return lcs_len / max_len
107
+
108
+
109
+ def find_similar_messages(
110
+ messages: list[tuple[int, str]],
111
+ threshold: float = 0.7,
112
+ min_length: int = 20
113
+ ) -> list[tuple[int, int, float]]:
114
+ """
115
+ Find pairs of similar messages using LCS.
116
+
117
+ Args:
118
+ messages: List of (id, text) tuples
119
+ threshold: Minimum similarity to report (0-1)
120
+ min_length: Minimum message length to consider
121
+
122
+ Returns:
123
+ List of (id1, id2, similarity) tuples
124
+
125
+ Time: O(n² * m) where n=messages, m=avg length
126
+ """
127
+ # Filter by length
128
+ filtered = [(id_, text) for id_, text in messages if len(text) >= min_length]
129
+
130
+ similar_pairs = []
131
+ n = len(filtered)
132
+
133
+ for i in range(n):
134
+ for j in range(i + 1, n):
135
+ id1, text1 = filtered[i]
136
+ id2, text2 = filtered[j]
137
+
138
+ # Quick length check - if lengths differ too much, skip
139
+ len_ratio = min(len(text1), len(text2)) / max(len(text1), len(text2))
140
+ if len_ratio < threshold:
141
+ continue
142
+
143
+ sim = lcs_similarity(text1, text2)
144
+ if sim >= threshold:
145
+ similar_pairs.append((id1, id2, sim))
146
+
147
+ return sorted(similar_pairs, key=lambda x: x[2], reverse=True)
148
+
149
+
150
+ # ============================================
151
+ # HEAP-BASED TOP-K
152
+ # ============================================
153
+
154
+ class TopK:
155
+ """
156
+ Efficient Top-K tracker using min-heap.
157
+
158
+ Maintains the K largest elements seen so far.
159
+
160
+ Time: O(n log k) for n insertions
161
+ Space: O(k)
162
+
163
+ Use case: Top users, top words, top domains without sorting all data.
164
+ """
165
+
166
+ def __init__(self, k: int, key: Callable[[Any], float] = None):
167
+ """
168
+ Args:
169
+ k: Number of top elements to track
170
+ key: Function to extract comparison value (default: identity)
171
+ """
172
+ self.k = k
173
+ self.key = key or (lambda x: x)
174
+ self.heap: list[tuple[float, int, Any]] = [] # (key_value, counter, item)
175
+ self.counter = 0 # For stable sorting
176
+
177
+ def push(self, item: Any) -> None:
178
+ """Add an item. O(log k)."""
179
+ key_val = self.key(item)
180
+
181
+ if len(self.heap) < self.k:
182
+ heapq.heappush(self.heap, (key_val, self.counter, item))
183
+ elif key_val > self.heap[0][0]:
184
+ heapq.heapreplace(self.heap, (key_val, self.counter, item))
185
+
186
+ self.counter += 1
187
+
188
+ def get_top(self) -> list[Any]:
189
+ """Get top K items sorted by key descending. O(k log k)."""
190
+ return [item for _, _, item in sorted(self.heap, reverse=True)]
191
+
192
+ def __len__(self) -> int:
193
+ return len(self.heap)
194
+
195
+
196
+ def top_k_frequent(items: list[Any], k: int) -> list[tuple[Any, int]]:
197
+ """
198
+ Find top K most frequent items.
199
+
200
+ Time: O(n + m log k) where n=items, m=unique items
201
+ Space: O(m)
202
+
203
+ Use case: Top words, top users, top mentioned usernames.
204
+ """
205
+ # Count frequencies
206
+ freq = defaultdict(int)
207
+ for item in items:
208
+ freq[item] += 1
209
+
210
+ # Use heap to find top K
211
+ top = TopK(k, key=lambda x: x[1])
212
+ for item, count in freq.items():
213
+ top.push((item, count))
214
+
215
+ return top.get_top()
216
+
217
+
218
+ def top_k_by_field(
219
+ records: list[dict],
220
+ field: str,
221
+ k: int,
222
+ reverse: bool = True
223
+ ) -> list[dict]:
224
+ """
225
+ Get top K records by a specific field value.
226
+
227
+ Time: O(n log k)
228
+
229
+ Use case: Top messages by length, top users by message count.
230
+ """
231
+ if reverse:
232
+ # Max K - use min heap
233
+ top = TopK(k, key=lambda x: x.get(field, 0))
234
+ else:
235
+ # Min K - negate the key
236
+ top = TopK(k, key=lambda x: -x.get(field, 0))
237
+
238
+ for record in records:
239
+ top.push(record)
240
+
241
+ return top.get_top()
242
+
243
+
244
+ # ============================================
245
+ # SELECTION ALGORITHM (MEDIAN OF MEDIANS)
246
+ # ============================================
247
+
248
+ def partition(arr: list, left: int, right: int, pivot_idx: int) -> int:
249
+ """
250
+ Partition array around pivot (Lomuto scheme).
251
+
252
+ Returns final position of pivot.
253
+ """
254
+ pivot_val = arr[pivot_idx]
255
+
256
+ # Move pivot to end
257
+ arr[pivot_idx], arr[right] = arr[right], arr[pivot_idx]
258
+
259
+ store_idx = left
260
+ for i in range(left, right):
261
+ if arr[i] < pivot_val:
262
+ arr[store_idx], arr[i] = arr[i], arr[store_idx]
263
+ store_idx += 1
264
+
265
+ # Move pivot to final position
266
+ arr[store_idx], arr[right] = arr[right], arr[store_idx]
267
+
268
+ return store_idx
269
+
270
+
271
+ def median_of_five(arr: list, left: int, right: int) -> int:
272
+ """Find median of up to 5 elements, return its index."""
273
+ sub = [(arr[i], i) for i in range(left, right + 1)]
274
+ sub.sort()
275
+ return sub[len(sub) // 2][1]
276
+
277
+
278
+ def median_of_medians(arr: list, left: int, right: int) -> int:
279
+ """
280
+ Find approximate median using median-of-medians algorithm.
281
+
282
+ Returns index of the pivot.
283
+ """
284
+ n = right - left + 1
285
+
286
+ if n <= 5:
287
+ return median_of_five(arr, left, right)
288
+
289
+ # Divide into groups of 5 and find medians
290
+ medians = []
291
+ for i in range(left, right + 1, 5):
292
+ group_right = min(i + 4, right)
293
+ median_idx = median_of_five(arr, i, group_right)
294
+ medians.append(arr[median_idx])
295
+
296
+ # Recursively find median of medians
297
+ # For simplicity, use sorting for small arrays
298
+ medians.sort()
299
+ pivot_val = medians[len(medians) // 2]
300
+
301
+ # Find index of this value in original array
302
+ for i in range(left, right + 1):
303
+ if arr[i] == pivot_val:
304
+ return i
305
+
306
+ return left # Fallback
307
+
308
+
309
+ def quickselect(arr: list, k: int) -> Any:
310
+ """
311
+ Find the k-th smallest element (0-indexed).
312
+
313
+ Time: O(n) average, O(n) worst case with median-of-medians
314
+ Space: O(1) - in-place
315
+
316
+ Use case: Find median, percentiles without sorting.
317
+ """
318
+ arr = arr.copy() # Don't modify original
319
+ left, right = 0, len(arr) - 1
320
+
321
+ while left < right:
322
+ # Use median of medians for pivot selection
323
+ pivot_idx = median_of_medians(arr, left, right)
324
+ pivot_idx = partition(arr, left, right, pivot_idx)
325
+
326
+ if k == pivot_idx:
327
+ return arr[k]
328
+ elif k < pivot_idx:
329
+ right = pivot_idx - 1
330
+ else:
331
+ left = pivot_idx + 1
332
+
333
+ return arr[left]
334
+
335
+
336
+ def find_median(arr: list) -> float:
337
+ """
338
+ Find median in O(n) time.
339
+
340
+ Use case: Median message length, median activity time.
341
+ """
342
+ n = len(arr)
343
+ if n == 0:
344
+ return 0.0
345
+
346
+ if n % 2 == 1:
347
+ return float(quickselect(arr, n // 2))
348
+ else:
349
+ return (quickselect(arr, n // 2 - 1) + quickselect(arr, n // 2)) / 2
350
+
351
+
352
+ def find_percentile(arr: list, p: float) -> float:
353
+ """
354
+ Find the p-th percentile (0-100) in O(n) time.
355
+
356
+ Use case: 90th percentile response time, activity distribution.
357
+ """
358
+ if not arr:
359
+ return 0.0
360
+
361
+ k = int((p / 100) * (len(arr) - 1))
362
+ return float(quickselect(arr, k))
363
+
364
+
365
+ # ============================================
366
+ # RANK TREE (ORDER STATISTICS TREE)
367
+ # ============================================
368
+
369
+ @dataclass
370
+ class RankTreeNode:
371
+ """Node in an Order Statistics Tree (augmented BST)."""
372
+ key: Any
373
+ value: Any = None
374
+ left: 'RankTreeNode' = None
375
+ right: 'RankTreeNode' = None
376
+ size: int = 1 # Size of subtree (for rank queries)
377
+ height: int = 1 # For AVL balancing
378
+
379
+
380
+ class RankTree:
381
+ """
382
+ Order Statistics Tree with AVL balancing.
383
+
384
+ Supports:
385
+ - O(log n) insert, delete, search
386
+ - O(log n) select(k) - find k-th smallest
387
+ - O(log n) rank(x) - find rank of element x
388
+
389
+ Use case: "What rank is this user?", "Who is the 100th most active?"
390
+ """
391
+
392
+ def __init__(self, key_func: Callable[[Any], Any] = None):
393
+ self.root: Optional[RankTreeNode] = None
394
+ self.key_func = key_func or (lambda x: x)
395
+
396
+ def _get_size(self, node: RankTreeNode) -> int:
397
+ return node.size if node else 0
398
+
399
+ def _get_height(self, node: RankTreeNode) -> int:
400
+ return node.height if node else 0
401
+
402
+ def _get_balance(self, node: RankTreeNode) -> int:
403
+ return self._get_height(node.left) - self._get_height(node.right) if node else 0
404
+
405
+ def _update(self, node: RankTreeNode) -> None:
406
+ """Update size and height of a node."""
407
+ if node:
408
+ node.size = 1 + self._get_size(node.left) + self._get_size(node.right)
409
+ node.height = 1 + max(self._get_height(node.left), self._get_height(node.right))
410
+
411
+ def _rotate_right(self, y: RankTreeNode) -> RankTreeNode:
412
+ """Right rotation for AVL balance."""
413
+ x = y.left
414
+ T2 = x.right
415
+
416
+ x.right = y
417
+ y.left = T2
418
+
419
+ self._update(y)
420
+ self._update(x)
421
+
422
+ return x
423
+
424
+ def _rotate_left(self, x: RankTreeNode) -> RankTreeNode:
425
+ """Left rotation for AVL balance."""
426
+ y = x.right
427
+ T2 = y.left
428
+
429
+ y.left = x
430
+ x.right = T2
431
+
432
+ self._update(x)
433
+ self._update(y)
434
+
435
+ return y
436
+
437
+ def _balance(self, node: RankTreeNode) -> RankTreeNode:
438
+ """Balance the node if needed (AVL)."""
439
+ self._update(node)
440
+ balance = self._get_balance(node)
441
+
442
+ # Left heavy
443
+ if balance > 1:
444
+ if self._get_balance(node.left) < 0:
445
+ node.left = self._rotate_left(node.left)
446
+ return self._rotate_right(node)
447
+
448
+ # Right heavy
449
+ if balance < -1:
450
+ if self._get_balance(node.right) > 0:
451
+ node.right = self._rotate_right(node.right)
452
+ return self._rotate_left(node)
453
+
454
+ return node
455
+
456
+ def insert(self, key: Any, value: Any = None) -> None:
457
+ """Insert a key-value pair. O(log n)."""
458
+ self.root = self._insert(self.root, key, value)
459
+
460
+ def _insert(self, node: RankTreeNode, key: Any, value: Any) -> RankTreeNode:
461
+ if not node:
462
+ return RankTreeNode(key=key, value=value)
463
+
464
+ if key < node.key:
465
+ node.left = self._insert(node.left, key, value)
466
+ elif key > node.key:
467
+ node.right = self._insert(node.right, key, value)
468
+ else:
469
+ node.value = value # Update existing
470
+ return node
471
+
472
+ return self._balance(node)
473
+
474
+ def select(self, k: int) -> Optional[Any]:
475
+ """
476
+ Find the k-th smallest element (1-indexed).
477
+
478
+ O(log n)
479
+
480
+ Use case: "Who is the 10th most active user?"
481
+ """
482
+ return self._select(self.root, k)
483
+
484
+ def _select(self, node: RankTreeNode, k: int) -> Optional[Any]:
485
+ if not node:
486
+ return None
487
+
488
+ left_size = self._get_size(node.left)
489
+
490
+ if k == left_size + 1:
491
+ return node.value
492
+ elif k <= left_size:
493
+ return self._select(node.left, k)
494
+ else:
495
+ return self._select(node.right, k - left_size - 1)
496
+
497
+ def rank(self, key: Any) -> int:
498
+ """
499
+ Find the rank of an element (1-indexed).
500
+
501
+ O(log n)
502
+
503
+ Use case: "What rank is user X?"
504
+ """
505
+ return self._rank(self.root, key)
506
+
507
+ def _rank(self, node: RankTreeNode, key: Any) -> int:
508
+ if not node:
509
+ return 0
510
+
511
+ if key < node.key:
512
+ return self._rank(node.left, key)
513
+ elif key > node.key:
514
+ return 1 + self._get_size(node.left) + self._rank(node.right, key)
515
+ else:
516
+ return self._get_size(node.left) + 1
517
+
518
+ def __len__(self) -> int:
519
+ return self._get_size(self.root)
520
+
521
+ def inorder(self) -> Generator[tuple[Any, Any], None, None]:
522
+ """Iterate in sorted order."""
523
+ def _inorder(node):
524
+ if node:
525
+ yield from _inorder(node.left)
526
+ yield (node.key, node.value)
527
+ yield from _inorder(node.right)
528
+ yield from _inorder(self.root)
529
+
530
+
531
+ # ============================================
532
+ # BUCKET SORT FOR TIME-BASED DATA
533
+ # ============================================
534
+
535
+ def bucket_sort_by_time(
536
+ records: list[dict],
537
+ time_field: str,
538
+ bucket_size: int = 3600, # Default: 1 hour
539
+ start_time: int = None,
540
+ end_time: int = None
541
+ ) -> list[list[dict]]:
542
+ """
543
+ Sort records into time-based buckets.
544
+
545
+ Time: O(n + k) where k = number of buckets
546
+ Space: O(n)
547
+
548
+ Use case: Group messages by hour, day, week for histograms.
549
+
550
+ Args:
551
+ records: List of dicts with timestamp field
552
+ time_field: Name of the timestamp field
553
+ bucket_size: Size of each bucket in seconds
554
+ start_time: Start of range (default: min timestamp)
555
+ end_time: End of range (default: max timestamp)
556
+
557
+ Returns:
558
+ List of buckets, each containing records in that time range
559
+ """
560
+ if not records:
561
+ return []
562
+
563
+ # Extract timestamps
564
+ timestamps = [r.get(time_field, 0) for r in records]
565
+
566
+ if start_time is None:
567
+ start_time = min(timestamps)
568
+ if end_time is None:
569
+ end_time = max(timestamps)
570
+
571
+ # Calculate number of buckets
572
+ n_buckets = max(1, (end_time - start_time) // bucket_size + 1)
573
+
574
+ # Initialize buckets
575
+ buckets: list[list[dict]] = [[] for _ in range(n_buckets)]
576
+
577
+ # Distribute records into buckets
578
+ for record in records:
579
+ ts = record.get(time_field, 0)
580
+ if ts < start_time or ts > end_time:
581
+ continue
582
+
583
+ bucket_idx = min((ts - start_time) // bucket_size, n_buckets - 1)
584
+ buckets[bucket_idx].append(record)
585
+
586
+ return buckets
587
+
588
+
589
+ def time_histogram(
590
+ records: list[dict],
591
+ time_field: str,
592
+ bucket_size: int = 3600
593
+ ) -> list[tuple[int, int]]:
594
+ """
595
+ Create a histogram of record counts over time.
596
+
597
+ Returns list of (bucket_start_time, count) tuples.
598
+
599
+ Use case: Activity over time visualization.
600
+ """
601
+ if not records:
602
+ return []
603
+
604
+ timestamps = [r.get(time_field, 0) for r in records]
605
+ start_time = min(timestamps)
606
+ end_time = max(timestamps)
607
+
608
+ buckets = bucket_sort_by_time(records, time_field, bucket_size, start_time, end_time)
609
+
610
+ result = []
611
+ for i, bucket in enumerate(buckets):
612
+ bucket_time = start_time + i * bucket_size
613
+ result.append((bucket_time, len(bucket)))
614
+
615
+ return result
616
+
617
+
618
+ def hourly_distribution(
619
+ records: list[dict],
620
+ time_field: str
621
+ ) -> dict[int, int]:
622
+ """
623
+ Get distribution of records by hour of day (0-23).
624
+
625
+ Time: O(n)
626
+
627
+ Use case: When are users most active?
628
+ """
629
+ from datetime import datetime
630
+
631
+ dist = defaultdict(int)
632
+
633
+ for record in records:
634
+ ts = record.get(time_field, 0)
635
+ if ts:
636
+ hour = datetime.fromtimestamp(ts).hour
637
+ dist[hour] += 1
638
+
639
+ return dict(dist)
640
+
641
+
642
+ # ============================================
643
+ # COMBINED DATA STRUCTURE: RANKED TIME INDEX
644
+ # ============================================
645
+
646
+ class RankedTimeIndex:
647
+ """
648
+ Combined data structure for efficient time-based and rank queries.
649
+
650
+ Combines:
651
+ - Bucket sort for O(1) time range access
652
+ - Rank tree for O(log n) rank queries
653
+ - Top-K heap for efficient top queries
654
+
655
+ Use case: "Top 10 users in the last hour", "Rank of user X this week"
656
+ """
657
+
658
+ def __init__(self, bucket_size: int = 3600):
659
+ self.bucket_size = bucket_size
660
+ self.buckets: dict[int, list[dict]] = defaultdict(list) # bucket_id -> records
661
+ self.rank_tree = RankTree() # For rank queries
662
+ self.total_count = 0
663
+ self.min_time = float('inf')
664
+ self.max_time = 0
665
+
666
+ def add(self, record: dict, time_field: str = 'date_unixtime', rank_field: str = None) -> None:
667
+ """Add a record to the index. O(log n)."""
668
+ ts = record.get(time_field, 0)
669
+
670
+ # Update time bounds
671
+ self.min_time = min(self.min_time, ts)
672
+ self.max_time = max(self.max_time, ts)
673
+
674
+ # Add to time bucket
675
+ bucket_id = ts // self.bucket_size
676
+ self.buckets[bucket_id].append(record)
677
+
678
+ # Add to rank tree if rank field specified
679
+ if rank_field and rank_field in record:
680
+ self.rank_tree.insert(record[rank_field], record)
681
+
682
+ self.total_count += 1
683
+
684
+ def get_time_range(self, start_time: int, end_time: int) -> list[dict]:
685
+ """
686
+ Get all records in time range. O(k) where k = records in range.
687
+ """
688
+ start_bucket = start_time // self.bucket_size
689
+ end_bucket = end_time // self.bucket_size
690
+
691
+ results = []
692
+ for bucket_id in range(start_bucket, end_bucket + 1):
693
+ for record in self.buckets.get(bucket_id, []):
694
+ ts = record.get('date_unixtime', 0)
695
+ if start_time <= ts <= end_time:
696
+ results.append(record)
697
+
698
+ return results
699
+
700
+ def top_k_in_range(
701
+ self,
702
+ start_time: int,
703
+ end_time: int,
704
+ k: int,
705
+ score_field: str
706
+ ) -> list[dict]:
707
+ """
708
+ Get top K records by score in time range.
709
+
710
+ O(m log k) where m = records in range
711
+ """
712
+ records = self.get_time_range(start_time, end_time)
713
+ return top_k_by_field(records, score_field, k)
714
+
715
+ def get_rank(self, key: Any) -> int:
716
+ """Get rank of element. O(log n)."""
717
+ return self.rank_tree.rank(key)
718
+
719
+ def get_by_rank(self, k: int) -> Optional[dict]:
720
+ """Get element by rank. O(log n)."""
721
+ return self.rank_tree.select(k)
722
+
723
+
724
+ # ============================================
725
+ # TESTS AND DEMOS
726
+ # ============================================
727
+
728
+ def run_tests():
729
+ """Run tests for all algorithms."""
730
+ print("=" * 60)
731
+ print("ALGORITHM TESTS")
732
+ print("=" * 60)
733
+
734
+ # Test LCS
735
+ print("\n--- LCS (Longest Common Subsequence) ---")
736
+ s1 = "שלום לכולם מה קורה"
737
+ s2 = "שלום לכולם מה נשמע"
738
+ lcs = lcs_string(s1, s2)
739
+ sim = lcs_similarity(s1, s2)
740
+ print(f"String 1: {s1}")
741
+ print(f"String 2: {s2}")
742
+ print(f"LCS: '{lcs}'")
743
+ print(f"Similarity: {sim:.2%}")
744
+
745
+ # Test similar message detection
746
+ messages = [
747
+ (1, "היי מה קורה איך אתה"),
748
+ (2, "היי מה קורה איך את"),
749
+ (3, "שלום לכולם"),
750
+ (4, "היי מה קורה איך אתם"),
751
+ ]
752
+ similar = find_similar_messages(messages, threshold=0.7, min_length=5)
753
+ print(f"\nSimilar message pairs (threshold 0.7):")
754
+ for id1, id2, sim in similar:
755
+ print(f" Messages {id1} & {id2}: {sim:.2%}")
756
+
757
+ # Test Top-K
758
+ print("\n--- Heap-based Top-K ---")
759
+ items = ['apple', 'banana', 'apple', 'cherry', 'banana', 'apple', 'date', 'banana']
760
+ top = top_k_frequent(items, k=2)
761
+ print(f"Items: {items}")
762
+ print(f"Top 2 frequent: {top}")
763
+
764
+ # Test Selection (Median)
765
+ print("\n--- Selection Algorithm (Median) ---")
766
+ arr = [3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5]
767
+ median = find_median(arr)
768
+ p90 = find_percentile(arr, 90)
769
+ print(f"Array: {arr}")
770
+ print(f"Median: {median}")
771
+ print(f"90th percentile: {p90}")
772
+
773
+ # Test Rank Tree
774
+ print("\n--- Rank Tree (Order Statistics) ---")
775
+ tree = RankTree()
776
+ users = [
777
+ (100, "Alice"),
778
+ (250, "Bob"),
779
+ (50, "Charlie"),
780
+ (300, "Diana"),
781
+ (150, "Eve"),
782
+ ]
783
+ for score, name in users:
784
+ tree.insert(score, name)
785
+
786
+ print(f"Users by score: {users}")
787
+ print(f"3rd ranked (by score): {tree.select(3)}")
788
+ print(f"Rank of score 150: {tree.rank(150)}")
789
+ print(f"All in order: {list(tree.inorder())}")
790
+
791
+ # Test Bucket Sort
792
+ print("\n--- Bucket Sort (Time-based) ---")
793
+ records = [
794
+ {'id': 1, 'ts': 1000},
795
+ {'id': 2, 'ts': 1500},
796
+ {'id': 3, 'ts': 2500},
797
+ {'id': 4, 'ts': 1200},
798
+ {'id': 5, 'ts': 3000},
799
+ ]
800
+ hist = time_histogram(records, 'ts', bucket_size=1000)
801
+ print(f"Records: {records}")
802
+ print(f"Histogram (bucket=1000): {hist}")
803
+
804
+ # Test Combined Structure
805
+ print("\n--- Combined RankedTimeIndex ---")
806
+ index = RankedTimeIndex(bucket_size=1000)
807
+ for r in records:
808
+ index.add(r, time_field='ts', rank_field='id')
809
+
810
+ range_result = index.get_time_range(1000, 2000)
811
+ print(f"Records in time range 1000-2000: {[r['id'] for r in range_result]}")
812
+
813
+ print("\n" + "=" * 60)
814
+ print("ALL TESTS PASSED!")
815
+ print("=" * 60)
816
+
817
+
818
+ if __name__ == '__main__':
819
+ run_tests()
dashboard.py ADDED
@@ -0,0 +1,2086 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Telegram Analytics Dashboard - Web Server
4
+
5
+ A Flask-based web dashboard for visualizing Telegram chat analytics.
6
+ Inspired by Combot and other Telegram statistics bots.
7
+
8
+ Usage:
9
+ python dashboard.py --db telegram.db --port 5000
10
+ Then open http://localhost:5000 in your browser
11
+
12
+ Requirements:
13
+ pip install flask
14
+ """
15
+
16
+ import sqlite3
17
+ import json
18
+ import csv
19
+ import io
20
+ import os
21
+ from datetime import datetime, timedelta
22
+ from flask import Flask, render_template, jsonify, request, Response
23
+ from typing import Optional
24
+ from collections import defaultdict
25
+
26
+ # ==========================================
27
+ # AI CONFIGURATION
28
+ # Set via environment variables (e.g. in .env or hosting platform settings)
29
+ # ==========================================
30
+ if not os.environ.get('AI_PROVIDER'):
31
+ os.environ['AI_PROVIDER'] = 'gemini'
32
+ # GEMINI_API_KEY should be set as an environment variable, not hardcoded
33
+
34
+ # Import our algorithms
35
+ from algorithms import (
36
+ TopK, find_median, find_percentile, top_k_frequent,
37
+ RankTree, lcs_similarity, find_similar_messages,
38
+ bucket_sort_by_time, time_histogram, RankedTimeIndex
39
+ )
40
+
41
+ # Import semantic search (uses pre-computed embeddings)
42
+ try:
43
+ from semantic_search import get_semantic_search
44
+ HAS_SEMANTIC_SEARCH = True
45
+ except ImportError:
46
+ HAS_SEMANTIC_SEARCH = False
47
+ get_semantic_search = None
48
+
49
+ app = Flask(__name__)
50
+ DB_PATH = 'telegram.db'
51
+
52
+
53
+ def get_db():
54
+ """Get database connection."""
55
+ conn = sqlite3.connect(DB_PATH)
56
+ conn.row_factory = sqlite3.Row
57
+ return conn
58
+
59
+
60
+ def parse_timeframe(timeframe: str) -> tuple[int, int]:
61
+ """Parse timeframe string to Unix timestamps."""
62
+ now = datetime.now()
63
+ today_start = datetime(now.year, now.month, now.day)
64
+
65
+ if timeframe == 'today':
66
+ start = today_start
67
+ end = now
68
+ elif timeframe == 'yesterday':
69
+ start = today_start - timedelta(days=1)
70
+ end = today_start
71
+ elif timeframe == 'week':
72
+ start = today_start - timedelta(days=7)
73
+ end = now
74
+ elif timeframe == 'month':
75
+ start = today_start - timedelta(days=30)
76
+ end = now
77
+ elif timeframe == 'year':
78
+ start = today_start - timedelta(days=365)
79
+ end = now
80
+ elif timeframe == 'all':
81
+ return 0, int(now.timestamp())
82
+ else:
83
+ # Custom range: "start,end" as Unix timestamps
84
+ try:
85
+ parts = timeframe.split(',')
86
+ return int(parts[0]), int(parts[1])
87
+ except:
88
+ return 0, int(now.timestamp())
89
+
90
+ return int(start.timestamp()), int(end.timestamp())
91
+
92
+
93
+ # ==========================================
94
+ # CACHE INVALIDATION SYSTEM
95
+ # ==========================================
96
+
97
+ _cache_version = 0 # Incremented on DB updates to invalidate all caches
98
+
99
+
100
+ def invalidate_caches():
101
+ """Invalidate all cached data. Call after DB updates (sync, import, etc.)."""
102
+ global _cache_version, _user_rank_tree, _user_rank_tree_timeframe
103
+ _cache_version += 1
104
+ _user_rank_tree = None
105
+ _user_rank_tree_timeframe = None
106
+
107
+
108
+ # ==========================================
109
+ # GLOBAL ALGORITHM CACHES
110
+ # ==========================================
111
+
112
+ # RankTree for O(log n) user ranking - rebuilt on demand
113
+ _user_rank_tree = None
114
+ _user_rank_tree_timeframe = None
115
+ _user_rank_tree_version = -1
116
+
117
+ def get_user_rank_tree(timeframe: str):
118
+ """
119
+ Get or rebuild the user rank tree for efficient O(log n) rank queries.
120
+ Tree is cached and rebuilt only when timeframe or DB version changes.
121
+ """
122
+ global _user_rank_tree, _user_rank_tree_timeframe, _user_rank_tree_version
123
+
124
+ if (_user_rank_tree is not None
125
+ and _user_rank_tree_timeframe == timeframe
126
+ and _user_rank_tree_version == _cache_version):
127
+ return _user_rank_tree
128
+
129
+ start_ts, end_ts = parse_timeframe(timeframe)
130
+ conn = get_db()
131
+
132
+ cursor = conn.execute('''
133
+ SELECT from_id, from_name, COUNT(*) as message_count
134
+ FROM messages
135
+ WHERE date_unixtime BETWEEN ? AND ?
136
+ AND from_id IS NOT NULL AND from_id != ''
137
+ GROUP BY from_id
138
+ ORDER BY message_count DESC
139
+ ''', (start_ts, end_ts))
140
+
141
+ _user_rank_tree = RankTree()
142
+ for row in cursor.fetchall():
143
+ _user_rank_tree.insert(
144
+ -row['message_count'],
145
+ {'user_id': row['from_id'], 'name': row['from_name'], 'messages': row['message_count']}
146
+ )
147
+
148
+ conn.close()
149
+ _user_rank_tree_timeframe = timeframe
150
+ _user_rank_tree_version = _cache_version
151
+ return _user_rank_tree
152
+
153
+
154
+ # ==========================================
155
+ # PAGE ROUTES
156
+ # ==========================================
157
+
158
+ @app.route('/')
159
+ def index():
160
+ """Main dashboard page."""
161
+ return render_template('index.html')
162
+
163
+
164
+ @app.route('/users')
165
+ def users_page():
166
+ """User leaderboard page."""
167
+ return render_template('users.html')
168
+
169
+
170
+ @app.route('/moderation')
171
+ def moderation_page():
172
+ """Moderation analytics page."""
173
+ return render_template('moderation.html')
174
+
175
+
176
+ @app.route('/search')
177
+ def search_page():
178
+ """Search page."""
179
+ return render_template('search.html')
180
+
181
+
182
+ @app.route('/chat')
183
+ def chat_page():
184
+ """Chat view page - Telegram-like interface."""
185
+ return render_template('chat.html')
186
+
187
+
188
+ @app.route('/user/<user_id>')
189
+ def user_profile_page(user_id):
190
+ """User profile page with comprehensive statistics."""
191
+ return render_template('user_profile.html', user_id=user_id)
192
+
193
+
194
+ @app.route('/settings')
195
+ def settings_page():
196
+ """Settings and data update page."""
197
+ return render_template('settings.html')
198
+
199
+
200
+ # ==========================================
201
+ # API ENDPOINTS - OVERVIEW STATS
202
+ # ==========================================
203
+
204
+ @app.route('/api/overview')
205
+ def api_overview():
206
+ """Get overview statistics."""
207
+ timeframe = request.args.get('timeframe', 'all')
208
+ start_ts, end_ts = parse_timeframe(timeframe)
209
+
210
+ conn = get_db()
211
+
212
+ # Total messages
213
+ cursor = conn.execute('''
214
+ SELECT COUNT(*) FROM messages
215
+ WHERE date_unixtime BETWEEN ? AND ?
216
+ ''', (start_ts, end_ts))
217
+ total_messages = cursor.fetchone()[0]
218
+
219
+ # Active users
220
+ cursor = conn.execute('''
221
+ SELECT COUNT(DISTINCT from_id) FROM messages
222
+ WHERE date_unixtime BETWEEN ? AND ?
223
+ ''', (start_ts, end_ts))
224
+ active_users = cursor.fetchone()[0]
225
+
226
+ # Total users (all time)
227
+ cursor = conn.execute('SELECT COUNT(*) FROM users')
228
+ total_users = cursor.fetchone()[0]
229
+
230
+ # Date range
231
+ cursor = conn.execute('''
232
+ SELECT MIN(date_unixtime), MAX(date_unixtime) FROM messages
233
+ WHERE date_unixtime BETWEEN ? AND ?
234
+ ''', (start_ts, end_ts))
235
+ row = cursor.fetchone()
236
+ first_msg = row[0] or start_ts
237
+ last_msg = row[1] or end_ts
238
+
239
+ # Calculate days
240
+ days = max(1, (last_msg - first_msg) // 86400)
241
+
242
+ # Messages per day
243
+ messages_per_day = total_messages / days
244
+
245
+ # Users per day (average unique users)
246
+ cursor = conn.execute('''
247
+ SELECT COUNT(DISTINCT from_id) as users,
248
+ date(datetime(date_unixtime, 'unixepoch')) as day
249
+ FROM messages
250
+ WHERE date_unixtime BETWEEN ? AND ?
251
+ GROUP BY day
252
+ ''', (start_ts, end_ts))
253
+ daily_users = [r[0] for r in cursor.fetchall()]
254
+ users_per_day = sum(daily_users) / len(daily_users) if daily_users else 0
255
+
256
+ # Messages with media/links
257
+ cursor = conn.execute('''
258
+ SELECT
259
+ SUM(has_media) as media,
260
+ SUM(has_links) as links,
261
+ SUM(has_mentions) as mentions
262
+ FROM messages
263
+ WHERE date_unixtime BETWEEN ? AND ?
264
+ ''', (start_ts, end_ts))
265
+ row = cursor.fetchone()
266
+ media_count = row[0] or 0
267
+ links_count = row[1] or 0
268
+ mentions_count = row[2] or 0
269
+
270
+ # Replies
271
+ cursor = conn.execute('''
272
+ SELECT COUNT(*) FROM messages
273
+ WHERE date_unixtime BETWEEN ? AND ?
274
+ AND reply_to_message_id IS NOT NULL
275
+ ''', (start_ts, end_ts))
276
+ replies_count = cursor.fetchone()[0]
277
+
278
+ # Forwards
279
+ cursor = conn.execute('''
280
+ SELECT COUNT(*) FROM messages
281
+ WHERE date_unixtime BETWEEN ? AND ?
282
+ AND forwarded_from IS NOT NULL
283
+ ''', (start_ts, end_ts))
284
+ forwards_count = cursor.fetchone()[0]
285
+
286
+ conn.close()
287
+
288
+ return jsonify({
289
+ 'total_messages': total_messages,
290
+ 'active_users': active_users,
291
+ 'total_users': total_users,
292
+ 'messages_per_day': round(messages_per_day, 1),
293
+ 'users_per_day': round(users_per_day, 1),
294
+ 'messages_per_user': round(total_messages / active_users, 1) if active_users else 0,
295
+ 'media_count': media_count,
296
+ 'links_count': links_count,
297
+ 'mentions_count': mentions_count,
298
+ 'replies_count': replies_count,
299
+ 'forwards_count': forwards_count,
300
+ 'days_span': days,
301
+ 'first_message': first_msg,
302
+ 'last_message': last_msg
303
+ })
304
+
305
+
306
+ # ==========================================
307
+ # API ENDPOINTS - CHARTS
308
+ # ==========================================
309
+
310
+ @app.route('/api/chart/messages')
311
+ def api_chart_messages():
312
+ """Get message volume over time."""
313
+ timeframe = request.args.get('timeframe', 'month')
314
+ granularity = request.args.get('granularity', 'day') # hour, day, week
315
+ start_ts, end_ts = parse_timeframe(timeframe)
316
+
317
+ conn = get_db()
318
+
319
+ if granularity == 'hour':
320
+ format_str = '%Y-%m-%d %H:00'
321
+ elif granularity == 'week':
322
+ format_str = '%Y-W%W'
323
+ else: # day
324
+ format_str = '%Y-%m-%d'
325
+
326
+ cursor = conn.execute(f'''
327
+ SELECT
328
+ strftime('{format_str}', datetime(date_unixtime, 'unixepoch')) as period,
329
+ COUNT(*) as count
330
+ FROM messages
331
+ WHERE date_unixtime BETWEEN ? AND ?
332
+ GROUP BY period
333
+ ORDER BY period
334
+ ''', (start_ts, end_ts))
335
+
336
+ data = [{'label': row[0], 'value': row[1]} for row in cursor.fetchall()]
337
+ conn.close()
338
+
339
+ return jsonify(data)
340
+
341
+
342
+ @app.route('/api/chart/users')
343
+ def api_chart_users():
344
+ """Get active users over time."""
345
+ timeframe = request.args.get('timeframe', 'month')
346
+ granularity = request.args.get('granularity', 'day')
347
+ start_ts, end_ts = parse_timeframe(timeframe)
348
+
349
+ conn = get_db()
350
+
351
+ if granularity == 'hour':
352
+ format_str = '%Y-%m-%d %H:00'
353
+ elif granularity == 'week':
354
+ format_str = '%Y-W%W'
355
+ else:
356
+ format_str = '%Y-%m-%d'
357
+
358
+ cursor = conn.execute(f'''
359
+ SELECT
360
+ strftime('{format_str}', datetime(date_unixtime, 'unixepoch')) as period,
361
+ COUNT(DISTINCT from_id) as count
362
+ FROM messages
363
+ WHERE date_unixtime BETWEEN ? AND ?
364
+ GROUP BY period
365
+ ORDER BY period
366
+ ''', (start_ts, end_ts))
367
+
368
+ data = [{'label': row[0], 'value': row[1]} for row in cursor.fetchall()]
369
+ conn.close()
370
+
371
+ return jsonify(data)
372
+
373
+
374
+ @app.route('/api/chart/heatmap')
375
+ def api_chart_heatmap():
376
+ """Get activity heatmap (hour of day vs day of week)."""
377
+ timeframe = request.args.get('timeframe', 'all')
378
+ start_ts, end_ts = parse_timeframe(timeframe)
379
+
380
+ conn = get_db()
381
+
382
+ cursor = conn.execute('''
383
+ SELECT
384
+ CAST(strftime('%w', datetime(date_unixtime, 'unixepoch')) AS INTEGER) as dow,
385
+ CAST(strftime('%H', datetime(date_unixtime, 'unixepoch')) AS INTEGER) as hour,
386
+ COUNT(*) as count
387
+ FROM messages
388
+ WHERE date_unixtime BETWEEN ? AND ?
389
+ GROUP BY dow, hour
390
+ ''', (start_ts, end_ts))
391
+
392
+ # Initialize grid
393
+ heatmap = [[0 for _ in range(24)] for _ in range(7)]
394
+
395
+ for row in cursor.fetchall():
396
+ dow, hour, count = row
397
+ heatmap[dow][hour] = count
398
+
399
+ conn.close()
400
+
401
+ return jsonify({
402
+ 'data': heatmap,
403
+ 'days': ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'],
404
+ 'hours': list(range(24))
405
+ })
406
+
407
+
408
+ @app.route('/api/chart/daily')
409
+ def api_chart_daily():
410
+ """Get activity by day of week."""
411
+ timeframe = request.args.get('timeframe', 'all')
412
+ start_ts, end_ts = parse_timeframe(timeframe)
413
+
414
+ conn = get_db()
415
+
416
+ days = ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']
417
+
418
+ cursor = conn.execute('''
419
+ SELECT
420
+ CAST(strftime('%w', datetime(date_unixtime, 'unixepoch')) AS INTEGER) as dow,
421
+ COUNT(*) as count
422
+ FROM messages
423
+ WHERE date_unixtime BETWEEN ? AND ?
424
+ GROUP BY dow
425
+ ORDER BY dow
426
+ ''', (start_ts, end_ts))
427
+
428
+ data = {days[row[0]]: row[1] for row in cursor.fetchall()}
429
+ conn.close()
430
+
431
+ return jsonify([{'label': day, 'value': data.get(day, 0)} for day in days])
432
+
433
+
434
+ @app.route('/api/chart/hourly')
435
+ def api_chart_hourly():
436
+ """Get activity by hour of day."""
437
+ timeframe = request.args.get('timeframe', 'all')
438
+ start_ts, end_ts = parse_timeframe(timeframe)
439
+
440
+ conn = get_db()
441
+
442
+ cursor = conn.execute('''
443
+ SELECT
444
+ CAST(strftime('%H', datetime(date_unixtime, 'unixepoch')) AS INTEGER) as hour,
445
+ COUNT(*) as count
446
+ FROM messages
447
+ WHERE date_unixtime BETWEEN ? AND ?
448
+ GROUP BY hour
449
+ ORDER BY hour
450
+ ''', (start_ts, end_ts))
451
+
452
+ data = {row[0]: row[1] for row in cursor.fetchall()}
453
+ conn.close()
454
+
455
+ return jsonify([{'label': f'{h:02d}:00', 'value': data.get(h, 0)} for h in range(24)])
456
+
457
+
458
+ # ==========================================
459
+ # API ENDPOINTS - USERS
460
+ # ==========================================
461
+
462
+ @app.route('/api/users')
463
+ def api_users():
464
+ """Get user leaderboard including participants who never sent messages."""
465
+ timeframe = request.args.get('timeframe', 'all')
466
+ start_ts, end_ts = parse_timeframe(timeframe)
467
+ limit = int(request.args.get('limit', 50))
468
+ offset = int(request.args.get('offset', 0))
469
+ include_inactive = request.args.get('include_inactive', '1') == '1'
470
+
471
+ conn = get_db()
472
+
473
+ # Get total messages for percentage calculation
474
+ cursor = conn.execute('''
475
+ SELECT COUNT(*) FROM messages
476
+ WHERE date_unixtime BETWEEN ? AND ?
477
+ ''', (start_ts, end_ts))
478
+ total_messages = cursor.fetchone()[0]
479
+
480
+ # Get user stats from messages
481
+ cursor = conn.execute('''
482
+ SELECT
483
+ from_id,
484
+ from_name,
485
+ COUNT(*) as message_count,
486
+ SUM(LENGTH(text_plain)) as char_count,
487
+ SUM(has_links) as links,
488
+ SUM(has_media) as media,
489
+ MIN(date_unixtime) as first_seen,
490
+ MAX(date_unixtime) as last_seen,
491
+ COUNT(DISTINCT date(datetime(date_unixtime, 'unixepoch'))) as active_days
492
+ FROM messages
493
+ WHERE date_unixtime BETWEEN ? AND ?
494
+ AND from_id IS NOT NULL AND from_id != ''
495
+ GROUP BY from_id
496
+ ORDER BY message_count DESC
497
+ ''', (start_ts, end_ts))
498
+
499
+ active_users = []
500
+ active_user_ids = set()
501
+ for row in cursor.fetchall():
502
+ active_user_ids.add(row['from_id'])
503
+ active_users.append({
504
+ 'user_id': row['from_id'],
505
+ 'name': row['from_name'] or 'Unknown',
506
+ 'messages': row['message_count'],
507
+ 'characters': row['char_count'] or 0,
508
+ 'percentage': round(100 * row['message_count'] / total_messages, 2) if total_messages else 0,
509
+ 'links': row['links'] or 0,
510
+ 'media': row['media'] or 0,
511
+ 'first_seen': row['first_seen'],
512
+ 'last_seen': row['last_seen'],
513
+ 'active_days': row['active_days'],
514
+ 'daily_average': round(row['message_count'] / max(1, row['active_days']), 1),
515
+ 'is_participant': False,
516
+ 'role': None,
517
+ })
518
+
519
+ # Try to enrich with participant data and add inactive participants
520
+ has_participants = False
521
+ try:
522
+ cursor = conn.execute('SELECT COUNT(*) FROM participants')
523
+ has_participants = cursor.fetchone()[0] > 0
524
+ except Exception:
525
+ pass
526
+
527
+ if has_participants:
528
+ # Enrich active users with participant data
529
+ participant_map = {}
530
+ cursor = conn.execute('SELECT * FROM participants')
531
+ for row in cursor.fetchall():
532
+ participant_map[row['user_id']] = dict(row)
533
+
534
+ for user in active_users:
535
+ p = participant_map.get(user['user_id'])
536
+ if p:
537
+ user['is_participant'] = True
538
+ user['username'] = p.get('username', '')
539
+ if p.get('is_creator'):
540
+ user['role'] = 'creator'
541
+ elif p.get('is_admin'):
542
+ user['role'] = 'admin'
543
+ elif p.get('is_bot'):
544
+ user['role'] = 'bot'
545
+
546
+ # Add inactive participants (those who never sent messages)
547
+ if include_inactive:
548
+ for uid, p in participant_map.items():
549
+ if uid not in active_user_ids:
550
+ name = f"{p.get('first_name', '')} {p.get('last_name', '')}".strip()
551
+ role = None
552
+ if p.get('is_creator'):
553
+ role = 'creator'
554
+ elif p.get('is_admin'):
555
+ role = 'admin'
556
+ elif p.get('is_bot'):
557
+ role = 'bot'
558
+
559
+ active_users.append({
560
+ 'user_id': uid,
561
+ 'name': name or 'Unknown',
562
+ 'messages': 0,
563
+ 'characters': 0,
564
+ 'percentage': 0,
565
+ 'links': 0,
566
+ 'media': 0,
567
+ 'first_seen': None,
568
+ 'last_seen': None,
569
+ 'active_days': 0,
570
+ 'daily_average': 0,
571
+ 'is_participant': True,
572
+ 'username': p.get('username', ''),
573
+ 'role': role,
574
+ })
575
+
576
+ # Assign ranks (active users first, then inactive)
577
+ users_with_rank = []
578
+ for i, user in enumerate(active_users):
579
+ user['rank'] = i + 1 if user['messages'] > 0 else None
580
+ users_with_rank.append(user)
581
+
582
+ total_users = len(users_with_rank)
583
+ total_active = len(active_user_ids)
584
+
585
+ # Apply pagination
586
+ page_users = users_with_rank[offset:offset + limit]
587
+
588
+ conn.close()
589
+
590
+ return jsonify({
591
+ 'users': page_users,
592
+ 'total': total_users,
593
+ 'total_active': total_active,
594
+ 'total_participants': total_users - total_active if has_participants else 0,
595
+ 'limit': limit,
596
+ 'offset': offset
597
+ })
598
+
599
+
600
+ @app.route('/api/user/<user_id>')
601
+ def api_user_detail(user_id):
602
+ """Get detailed stats for a specific user."""
603
+ timeframe = request.args.get('timeframe', 'all')
604
+ start_ts, end_ts = parse_timeframe(timeframe)
605
+
606
+ conn = get_db()
607
+
608
+ # Basic stats
609
+ cursor = conn.execute('''
610
+ SELECT
611
+ from_name,
612
+ COUNT(*) as messages,
613
+ SUM(LENGTH(text_plain)) as characters,
614
+ SUM(has_links) as links,
615
+ SUM(has_media) as media,
616
+ SUM(has_mentions) as mentions,
617
+ MIN(date_unixtime) as first_seen,
618
+ MAX(date_unixtime) as last_seen,
619
+ COUNT(DISTINCT date(datetime(date_unixtime, 'unixepoch'))) as active_days
620
+ FROM messages
621
+ WHERE from_id = ?
622
+ AND date_unixtime BETWEEN ? AND ?
623
+ ''', (user_id, start_ts, end_ts))
624
+ row = cursor.fetchone()
625
+
626
+ if not row or not row['messages']:
627
+ conn.close()
628
+ return jsonify({'error': 'User not found'}), 404
629
+
630
+ # Replies sent
631
+ cursor = conn.execute('''
632
+ SELECT COUNT(*) FROM messages
633
+ WHERE from_id = ? AND reply_to_message_id IS NOT NULL
634
+ AND date_unixtime BETWEEN ? AND ?
635
+ ''', (user_id, start_ts, end_ts))
636
+ replies_sent = cursor.fetchone()[0]
637
+
638
+ # Replies received
639
+ cursor = conn.execute('''
640
+ SELECT COUNT(*) FROM messages m1
641
+ JOIN messages m2 ON m1.reply_to_message_id = m2.id
642
+ WHERE m2.from_id = ?
643
+ AND m1.date_unixtime BETWEEN ? AND ?
644
+ ''', (user_id, start_ts, end_ts))
645
+ replies_received = cursor.fetchone()[0]
646
+
647
+ # Activity by hour
648
+ cursor = conn.execute('''
649
+ SELECT
650
+ CAST(strftime('%H', datetime(date_unixtime, 'unixepoch')) AS INTEGER) as hour,
651
+ COUNT(*) as count
652
+ FROM messages
653
+ WHERE from_id = ?
654
+ AND date_unixtime BETWEEN ? AND ?
655
+ GROUP BY hour
656
+ ''', (user_id, start_ts, end_ts))
657
+ hourly = {row[0]: row[1] for row in cursor.fetchall()}
658
+
659
+ # Activity over time
660
+ cursor = conn.execute('''
661
+ SELECT
662
+ date(datetime(date_unixtime, 'unixepoch')) as day,
663
+ COUNT(*) as count
664
+ FROM messages
665
+ WHERE from_id = ?
666
+ AND date_unixtime BETWEEN ? AND ?
667
+ GROUP BY day
668
+ ORDER BY day DESC
669
+ LIMIT 30
670
+ ''', (user_id, start_ts, end_ts))
671
+ daily = [{'date': r[0], 'count': r[1]} for r in cursor.fetchall()]
672
+
673
+ # Rank
674
+ cursor = conn.execute('''
675
+ SELECT COUNT(*) + 1 FROM (
676
+ SELECT from_id, COUNT(*) as cnt FROM messages
677
+ WHERE date_unixtime BETWEEN ? AND ?
678
+ GROUP BY from_id
679
+ ) WHERE cnt > ?
680
+ ''', (start_ts, end_ts, row['messages']))
681
+ rank = cursor.fetchone()[0]
682
+
683
+ conn.close()
684
+
685
+ return jsonify({
686
+ 'user_id': user_id,
687
+ 'name': row['from_name'] or 'Unknown',
688
+ 'messages': row['messages'],
689
+ 'characters': row['characters'] or 0,
690
+ 'links': row['links'] or 0,
691
+ 'media': row['media'] or 0,
692
+ 'mentions': row['mentions'] or 0,
693
+ 'first_seen': row['first_seen'],
694
+ 'last_seen': row['last_seen'],
695
+ 'active_days': row['active_days'],
696
+ 'daily_average': round(row['messages'] / max(1, row['active_days']), 1),
697
+ 'replies_sent': replies_sent,
698
+ 'replies_received': replies_received,
699
+ 'rank': rank,
700
+ 'hourly_activity': [hourly.get(h, 0) for h in range(24)],
701
+ 'daily_activity': daily
702
+ })
703
+
704
+
705
+ @app.route('/api/user/<user_id>/profile')
706
+ def api_user_profile(user_id):
707
+ """Get comprehensive user profile with all available statistics."""
708
+ conn = get_db()
709
+
710
+ # ---- Participant info (from Telethon sync) ----
711
+ participant = None
712
+ try:
713
+ cursor = conn.execute('SELECT * FROM participants WHERE user_id = ?', (user_id,))
714
+ row = cursor.fetchone()
715
+ if row:
716
+ participant = dict(row)
717
+ except Exception:
718
+ pass # Table might not exist yet
719
+
720
+ # ---- Basic message stats ----
721
+ cursor = conn.execute('''
722
+ SELECT
723
+ from_name,
724
+ COUNT(*) as total_messages,
725
+ SUM(text_length) as total_chars,
726
+ AVG(text_length) as avg_length,
727
+ MAX(text_length) as max_length,
728
+ SUM(has_links) as links_shared,
729
+ SUM(has_media) as media_sent,
730
+ SUM(has_photo) as photos_sent,
731
+ SUM(has_mentions) as mentions_made,
732
+ SUM(is_edited) as edits,
733
+ MIN(date_unixtime) as first_message,
734
+ MAX(date_unixtime) as last_message,
735
+ COUNT(DISTINCT date(datetime(date_unixtime, 'unixepoch'))) as active_days
736
+ FROM messages WHERE from_id = ?
737
+ ''', (user_id,))
738
+ stats = cursor.fetchone()
739
+
740
+ if not stats or not stats['total_messages']:
741
+ # User might be a participant who never sent a message
742
+ if participant:
743
+ conn.close()
744
+ return jsonify({
745
+ 'user_id': user_id,
746
+ 'participant': participant,
747
+ 'has_messages': False,
748
+ 'name': f"{participant.get('first_name', '')} {participant.get('last_name', '')}".strip()
749
+ })
750
+ conn.close()
751
+ return jsonify({'error': 'User not found'}), 404
752
+
753
+ stats = dict(stats)
754
+
755
+ # ---- Replies sent (who does this user reply to most) ----
756
+ cursor = conn.execute('''
757
+ SELECT r.from_name, r.from_id, COUNT(*) as cnt
758
+ FROM messages m
759
+ JOIN messages r ON m.reply_to_message_id = r.id
760
+ WHERE m.from_id = ? AND r.from_id != ?
761
+ GROUP BY r.from_id
762
+ ORDER BY cnt DESC
763
+ LIMIT 10
764
+ ''', (user_id, user_id))
765
+ replies_to = [{'name': r[0], 'user_id': r[1], 'count': r[2]} for r in cursor.fetchall()]
766
+
767
+ # ---- Replies received (who replies to this user most) ----
768
+ cursor = conn.execute('''
769
+ SELECT m.from_name, m.from_id, COUNT(*) as cnt
770
+ FROM messages m
771
+ JOIN messages r ON m.reply_to_message_id = r.id
772
+ WHERE r.from_id = ? AND m.from_id != ?
773
+ GROUP BY m.from_id
774
+ ORDER BY cnt DESC
775
+ LIMIT 10
776
+ ''', (user_id, user_id))
777
+ replies_from = [{'name': r[0], 'user_id': r[1], 'count': r[2]} for r in cursor.fetchall()]
778
+
779
+ # ---- Total replies sent/received ----
780
+ cursor = conn.execute('''
781
+ SELECT COUNT(*) FROM messages
782
+ WHERE from_id = ? AND reply_to_message_id IS NOT NULL
783
+ ''', (user_id,))
784
+ total_replies_sent = cursor.fetchone()[0]
785
+
786
+ cursor = conn.execute('''
787
+ SELECT COUNT(*) FROM messages m
788
+ JOIN messages r ON m.reply_to_message_id = r.id
789
+ WHERE r.from_id = ? AND m.from_id != ?
790
+ ''', (user_id, user_id))
791
+ total_replies_received = cursor.fetchone()[0]
792
+
793
+ # ---- Forwarded messages ----
794
+ cursor = conn.execute('''
795
+ SELECT COUNT(*) FROM messages
796
+ WHERE from_id = ? AND forwarded_from IS NOT NULL
797
+ ''', (user_id,))
798
+ forwards_sent = cursor.fetchone()[0]
799
+
800
+ # ---- Top forwarded sources ----
801
+ cursor = conn.execute('''
802
+ SELECT forwarded_from, COUNT(*) as cnt
803
+ FROM messages
804
+ WHERE from_id = ? AND forwarded_from IS NOT NULL
805
+ GROUP BY forwarded_from
806
+ ORDER BY cnt DESC
807
+ LIMIT 5
808
+ ''', (user_id,))
809
+ top_forward_sources = [{'name': r[0], 'count': r[1]} for r in cursor.fetchall()]
810
+
811
+ # ---- Activity by hour ----
812
+ cursor = conn.execute('''
813
+ SELECT
814
+ CAST(strftime('%H', datetime(date_unixtime, 'unixepoch')) AS INTEGER) as hour,
815
+ COUNT(*) as count
816
+ FROM messages WHERE from_id = ?
817
+ GROUP BY hour
818
+ ''', (user_id,))
819
+ hourly = {r[0]: r[1] for r in cursor.fetchall()}
820
+
821
+ # ---- Activity by weekday ----
822
+ cursor = conn.execute('''
823
+ SELECT
824
+ CAST(strftime('%w', datetime(date_unixtime, 'unixepoch')) AS INTEGER) as weekday,
825
+ COUNT(*) as count
826
+ FROM messages WHERE from_id = ?
827
+ GROUP BY weekday
828
+ ''', (user_id,))
829
+ weekday_names = ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']
830
+ weekday_data = {r[0]: r[1] for r in cursor.fetchall()}
831
+ weekday_activity = [{'day': weekday_names[d], 'count': weekday_data.get(d, 0)} for d in range(7)]
832
+
833
+ # ---- Activity trend (last 90 days) ----
834
+ cursor = conn.execute('''
835
+ SELECT
836
+ date(datetime(date_unixtime, 'unixepoch')) as day,
837
+ COUNT(*) as count
838
+ FROM messages WHERE from_id = ?
839
+ GROUP BY day
840
+ ORDER BY day DESC
841
+ LIMIT 90
842
+ ''', (user_id,))
843
+ daily_activity = [{'date': r[0], 'count': r[1]} for r in cursor.fetchall()]
844
+
845
+ # ---- Monthly trend ----
846
+ cursor = conn.execute('''
847
+ SELECT
848
+ strftime('%Y-%m', datetime(date_unixtime, 'unixepoch')) as month,
849
+ COUNT(*) as count
850
+ FROM messages WHERE from_id = ?
851
+ GROUP BY month
852
+ ORDER BY month
853
+ ''', (user_id,))
854
+ monthly_activity = [{'month': r[0], 'count': r[1]} for r in cursor.fetchall()]
855
+
856
+ # ---- Top links shared ----
857
+ cursor = conn.execute('''
858
+ SELECT e.value, COUNT(*) as cnt
859
+ FROM entities e
860
+ JOIN messages m ON e.message_id = m.id
861
+ WHERE m.from_id = ? AND e.type = 'link'
862
+ GROUP BY e.value
863
+ ORDER BY cnt DESC
864
+ LIMIT 10
865
+ ''', (user_id,))
866
+ top_links = [{'url': r[0], 'count': r[1]} for r in cursor.fetchall()]
867
+
868
+ # ---- Rank among all users ----
869
+ cursor = conn.execute('''
870
+ SELECT COUNT(*) + 1 FROM (
871
+ SELECT from_id, COUNT(*) as cnt FROM messages GROUP BY from_id
872
+ ) WHERE cnt > ?
873
+ ''', (stats['total_messages'],))
874
+ rank = cursor.fetchone()[0]
875
+
876
+ cursor = conn.execute('SELECT COUNT(DISTINCT from_id) FROM messages')
877
+ total_users = cursor.fetchone()[0]
878
+
879
+ # ---- Average reply time (when replying to someone) ----
880
+ cursor = conn.execute('''
881
+ SELECT AVG(m.date_unixtime - r.date_unixtime)
882
+ FROM messages m
883
+ JOIN messages r ON m.reply_to_message_id = r.id
884
+ WHERE m.from_id = ?
885
+ AND m.date_unixtime - r.date_unixtime > 0
886
+ AND m.date_unixtime - r.date_unixtime < 86400
887
+ ''', (user_id,))
888
+ avg_reply_time = cursor.fetchone()[0]
889
+
890
+ conn.close()
891
+
892
+ # ---- Build response ----
893
+ total_msgs = stats['total_messages']
894
+ active_days = stats['active_days'] or 1
895
+ first_msg = stats['first_message']
896
+ last_msg = stats['last_message']
897
+ span_days = max(1, (last_msg - first_msg) / 86400) if first_msg and last_msg else 1
898
+
899
+ return jsonify({
900
+ 'user_id': user_id,
901
+ 'name': stats['from_name'] or 'Unknown',
902
+ 'has_messages': True,
903
+ 'participant': participant,
904
+
905
+ # Core stats
906
+ 'total_messages': total_msgs,
907
+ 'total_characters': stats['total_chars'] or 0,
908
+ 'avg_message_length': round(stats['avg_length'] or 0, 1),
909
+ 'max_message_length': stats['max_length'] or 0,
910
+ 'links_shared': stats['links_shared'] or 0,
911
+ 'media_sent': stats['media_sent'] or 0,
912
+ 'photos_sent': stats['photos_sent'] or 0,
913
+ 'mentions_made': stats['mentions_made'] or 0,
914
+ 'edits': stats['edits'] or 0,
915
+ 'forwards_sent': forwards_sent,
916
+
917
+ # Time stats
918
+ 'first_message': first_msg,
919
+ 'last_message': last_msg,
920
+ 'active_days': active_days,
921
+ 'daily_average': round(total_msgs / active_days, 1),
922
+ 'messages_per_calendar_day': round(total_msgs / span_days, 1),
923
+
924
+ # Reply stats
925
+ 'total_replies_sent': total_replies_sent,
926
+ 'total_replies_received': total_replies_received,
927
+ 'reply_ratio': round(total_replies_sent / max(1, total_msgs) * 100, 1),
928
+ 'avg_reply_time_seconds': round(avg_reply_time) if avg_reply_time else None,
929
+ 'replies_to': replies_to,
930
+ 'replies_from': replies_from,
931
+
932
+ # Forward stats
933
+ 'top_forward_sources': top_forward_sources,
934
+
935
+ # Ranking
936
+ 'rank': rank,
937
+ 'total_active_users': total_users,
938
+
939
+ # Activity patterns
940
+ 'hourly_activity': [hourly.get(h, 0) for h in range(24)],
941
+ 'weekday_activity': weekday_activity,
942
+ 'daily_activity': daily_activity,
943
+ 'monthly_activity': monthly_activity,
944
+
945
+ # Content
946
+ 'top_links': top_links,
947
+ })
948
+
949
+
950
+ # ==========================================
951
+ # API ENDPOINTS - CONTENT ANALYTICS
952
+ # ==========================================
953
+
954
+ @app.route('/api/top/words')
955
+ def api_top_words():
956
+ """Get top words."""
957
+ timeframe = request.args.get('timeframe', 'all')
958
+ start_ts, end_ts = parse_timeframe(timeframe)
959
+ limit = int(request.args.get('limit', 30))
960
+
961
+ conn = get_db()
962
+
963
+ cursor = conn.execute('''
964
+ SELECT text_plain FROM messages
965
+ WHERE date_unixtime BETWEEN ? AND ?
966
+ AND text_plain IS NOT NULL
967
+ ''', (start_ts, end_ts))
968
+
969
+ import re
970
+ word_pattern = re.compile(r'[\u0590-\u05FFa-zA-Z]{3,}')
971
+ words = []
972
+
973
+ for row in cursor.fetchall():
974
+ words.extend(word_pattern.findall(row[0].lower()))
975
+
976
+ conn.close()
977
+
978
+ top_words = top_k_frequent(words, limit)
979
+ return jsonify([{'word': w, 'count': c} for w, c in top_words])
980
+
981
+
982
+ @app.route('/api/top/domains')
983
+ def api_top_domains():
984
+ """Get top shared domains."""
985
+ timeframe = request.args.get('timeframe', 'all')
986
+ start_ts, end_ts = parse_timeframe(timeframe)
987
+ limit = int(request.args.get('limit', 20))
988
+
989
+ conn = get_db()
990
+
991
+ cursor = conn.execute('''
992
+ SELECT e.value FROM entities e
993
+ JOIN messages m ON e.message_id = m.id
994
+ WHERE e.type = 'link'
995
+ AND m.date_unixtime BETWEEN ? AND ?
996
+ ''', (start_ts, end_ts))
997
+
998
+ import re
999
+ domain_pattern = re.compile(r'https?://(?:www\.)?([^/]+)')
1000
+ domains = []
1001
+
1002
+ for row in cursor.fetchall():
1003
+ match = domain_pattern.match(row[0])
1004
+ if match:
1005
+ domains.append(match.group(1))
1006
+
1007
+ conn.close()
1008
+
1009
+ top_domains = top_k_frequent(domains, limit)
1010
+ return jsonify([{'domain': d, 'count': c} for d, c in top_domains])
1011
+
1012
+
1013
+ @app.route('/api/top/mentions')
1014
+ def api_top_mentions():
1015
+ """Get top mentioned users."""
1016
+ timeframe = request.args.get('timeframe', 'all')
1017
+ start_ts, end_ts = parse_timeframe(timeframe)
1018
+ limit = int(request.args.get('limit', 20))
1019
+
1020
+ conn = get_db()
1021
+
1022
+ cursor = conn.execute('''
1023
+ SELECT e.value, COUNT(*) as count FROM entities e
1024
+ JOIN messages m ON e.message_id = m.id
1025
+ WHERE e.type = 'mention'
1026
+ AND m.date_unixtime BETWEEN ? AND ?
1027
+ GROUP BY e.value
1028
+ ORDER BY count DESC
1029
+ LIMIT ?
1030
+ ''', (start_ts, end_ts, limit))
1031
+
1032
+ data = [{'mention': row[0], 'count': row[1]} for row in cursor.fetchall()]
1033
+ conn.close()
1034
+
1035
+ return jsonify(data)
1036
+
1037
+
1038
+ # ==========================================
1039
+ # API ENDPOINTS - ADVANCED ANALYTICS (Course Algorithms)
1040
+ # ==========================================
1041
+
1042
+ @app.route('/api/similar/<int:message_id>')
1043
+ def api_similar_messages(message_id):
1044
+ """
1045
+ Find messages similar to a given message using LCS algorithm.
1046
+
1047
+ Algorithm: LCS (Longest Common Subsequence)
1048
+ Time: O(n * m) where n = sample size, m = avg message length
1049
+ Use case: Detect reposts, spam, similar content
1050
+ """
1051
+ threshold = float(request.args.get('threshold', 0.7))
1052
+ limit = int(request.args.get('limit', 10))
1053
+ sample_size = int(request.args.get('sample', 1000))
1054
+
1055
+ conn = get_db()
1056
+
1057
+ # Get the target message
1058
+ cursor = conn.execute('''
1059
+ SELECT text_plain, from_name, date FROM messages WHERE id = ?
1060
+ ''', (message_id,))
1061
+ target = cursor.fetchone()
1062
+
1063
+ if not target or not target['text_plain']:
1064
+ conn.close()
1065
+ return jsonify({'error': 'Message not found or empty'}), 404
1066
+
1067
+ target_text = target['text_plain']
1068
+
1069
+ # Get sample of messages to compare (excluding the target)
1070
+ cursor = conn.execute('''
1071
+ SELECT id, text_plain, from_name, date FROM messages
1072
+ WHERE id != ? AND text_plain IS NOT NULL AND LENGTH(text_plain) > 20
1073
+ ORDER BY RANDOM()
1074
+ LIMIT ?
1075
+ ''', (message_id, sample_size))
1076
+
1077
+ messages = [(row['id'], row['text_plain']) for row in cursor.fetchall()]
1078
+ conn.close()
1079
+
1080
+ # Find similar messages using LCS
1081
+ similar = []
1082
+ for msg_id, text in messages:
1083
+ sim = lcs_similarity(target_text, text)
1084
+ if sim >= threshold:
1085
+ similar.append({
1086
+ 'id': msg_id,
1087
+ 'similarity': round(sim * 100, 1),
1088
+ 'text': text[:200] + '...' if len(text) > 200 else text
1089
+ })
1090
+
1091
+ # Sort by similarity descending and limit
1092
+ similar.sort(key=lambda x: x['similarity'], reverse=True)
1093
+ similar = similar[:limit]
1094
+
1095
+ return jsonify({
1096
+ 'target': {
1097
+ 'id': message_id,
1098
+ 'text': target_text[:200] + '...' if len(target_text) > 200 else target_text,
1099
+ 'from': target['from_name'],
1100
+ 'date': target['date']
1101
+ },
1102
+ 'similar': similar,
1103
+ 'algorithm': 'LCS (Longest Common Subsequence)',
1104
+ 'threshold': threshold
1105
+ })
1106
+
1107
+
1108
+ @app.route('/api/analytics/similar')
1109
+ def api_find_all_similar():
1110
+ """
1111
+ Find all similar message pairs in the database.
1112
+
1113
+ Algorithm: LCS with early termination
1114
+ Time: O(n² * m) where n = sample size, m = avg message length
1115
+ Use case: Detect spam campaigns, repeated content
1116
+ """
1117
+ timeframe = request.args.get('timeframe', 'all')
1118
+ threshold = float(request.args.get('threshold', 0.8))
1119
+ sample_size = int(request.args.get('sample', 500))
1120
+ start_ts, end_ts = parse_timeframe(timeframe)
1121
+
1122
+ conn = get_db()
1123
+
1124
+ cursor = conn.execute('''
1125
+ SELECT id, text_plain, from_name, from_id FROM messages
1126
+ WHERE date_unixtime BETWEEN ? AND ?
1127
+ AND text_plain IS NOT NULL AND LENGTH(text_plain) > 30
1128
+ ORDER BY RANDOM()
1129
+ LIMIT ?
1130
+ ''', (start_ts, end_ts, sample_size))
1131
+
1132
+ messages = [(row['id'], row['text_plain'], row['from_name'], row['from_id'])
1133
+ for row in cursor.fetchall()]
1134
+ conn.close()
1135
+
1136
+ # Use our LCS algorithm to find similar pairs
1137
+ message_pairs = [(id_, text) for id_, text, _, _ in messages]
1138
+ similar_pairs = find_similar_messages(message_pairs, threshold=threshold, min_length=30)
1139
+
1140
+ # Build result with user info
1141
+ id_to_info = {id_: (name, uid) for id_, _, name, uid in messages}
1142
+ id_to_text = {id_: text for id_, text, _, _ in messages}
1143
+
1144
+ results = []
1145
+ for id1, id2, sim in similar_pairs[:50]: # Limit to top 50
1146
+ results.append({
1147
+ 'message1': {
1148
+ 'id': id1,
1149
+ 'text': id_to_text[id1][:150],
1150
+ 'from': id_to_info[id1][0]
1151
+ },
1152
+ 'message2': {
1153
+ 'id': id2,
1154
+ 'text': id_to_text[id2][:150],
1155
+ 'from': id_to_info[id2][0]
1156
+ },
1157
+ 'similarity': round(sim * 100, 1)
1158
+ })
1159
+
1160
+ return jsonify({
1161
+ 'pairs': results,
1162
+ 'total_found': len(similar_pairs),
1163
+ 'algorithm': 'LCS (Longest Common Subsequence)',
1164
+ 'threshold': threshold,
1165
+ 'sample_size': sample_size
1166
+ })
1167
+
1168
+
1169
+ @app.route('/api/user/rank/<user_id>')
1170
+ def api_user_rank_efficient(user_id):
1171
+ """
1172
+ Get user rank using RankTree for O(log n) lookup.
1173
+
1174
+ Algorithm: Order Statistics Tree (AVL-based Rank Tree)
1175
+ Time: O(log n) instead of O(n) SQL scan
1176
+ Use case: Real-time user ranking queries
1177
+ """
1178
+ timeframe = request.args.get('timeframe', 'all')
1179
+ tree = get_user_rank_tree(timeframe)
1180
+
1181
+ # Find user in tree by iterating (still O(n) for lookup, but rank is O(log n))
1182
+ # For true O(log n), we'd need to store user_id as key
1183
+ start_ts, end_ts = parse_timeframe(timeframe)
1184
+ conn = get_db()
1185
+
1186
+ cursor = conn.execute('''
1187
+ SELECT COUNT(*) as count FROM messages
1188
+ WHERE from_id = ? AND date_unixtime BETWEEN ? AND ?
1189
+ ''', (user_id, start_ts, end_ts))
1190
+ user_count = cursor.fetchone()['count']
1191
+
1192
+ if user_count == 0:
1193
+ conn.close()
1194
+ return jsonify({'error': 'User not found'}), 404
1195
+
1196
+ # Use rank tree to find rank (O(log n))
1197
+ rank = tree.rank(-user_count) # Negative because tree uses negative counts
1198
+
1199
+ # Get total users
1200
+ total = len(tree)
1201
+
1202
+ conn.close()
1203
+
1204
+ return jsonify({
1205
+ 'user_id': user_id,
1206
+ 'messages': user_count,
1207
+ 'rank': rank,
1208
+ 'total_users': total,
1209
+ 'percentile': round(100 * (total - rank + 1) / total, 1) if total > 0 else 0,
1210
+ 'algorithm': 'RankTree (Order Statistics Tree)',
1211
+ 'complexity': 'O(log n)'
1212
+ })
1213
+
1214
+
1215
+ @app.route('/api/user/by-rank/<int:rank>')
1216
+ def api_user_by_rank(rank):
1217
+ """
1218
+ Get user at specific rank using RankTree.
1219
+
1220
+ Algorithm: Order Statistics Tree select(k)
1221
+ Time: O(log n)
1222
+ Use case: "Who is the 10th most active user?"
1223
+ """
1224
+ timeframe = request.args.get('timeframe', 'all')
1225
+ tree = get_user_rank_tree(timeframe)
1226
+
1227
+ if rank < 1 or rank > len(tree):
1228
+ return jsonify({'error': f'Rank must be between 1 and {len(tree)}'}), 400
1229
+
1230
+ user = tree.select(rank)
1231
+
1232
+ if not user:
1233
+ return jsonify({'error': 'User not found'}), 404
1234
+
1235
+ return jsonify({
1236
+ 'rank': rank,
1237
+ 'user': user,
1238
+ 'total_users': len(tree),
1239
+ 'algorithm': 'RankTree select(k)',
1240
+ 'complexity': 'O(log n)'
1241
+ })
1242
+
1243
+
1244
+ @app.route('/api/analytics/histogram')
1245
+ def api_activity_histogram():
1246
+ """
1247
+ Get activity histogram using Bucket Sort.
1248
+
1249
+ Algorithm: Bucket Sort
1250
+ Time: O(n + k) where k = number of buckets
1251
+ Use case: Efficient time-based grouping without SQL GROUP BY
1252
+ """
1253
+ timeframe = request.args.get('timeframe', 'month')
1254
+ bucket_seconds = int(request.args.get('bucket', 86400)) # Default: 1 day
1255
+ start_ts, end_ts = parse_timeframe(timeframe)
1256
+
1257
+ conn = get_db()
1258
+
1259
+ cursor = conn.execute('''
1260
+ SELECT date_unixtime FROM messages
1261
+ WHERE date_unixtime BETWEEN ? AND ?
1262
+ ''', (start_ts, end_ts))
1263
+
1264
+ records = [{'date_unixtime': row[0]} for row in cursor.fetchall()]
1265
+ conn.close()
1266
+
1267
+ # Use bucket sort algorithm
1268
+ histogram = time_histogram(records, 'date_unixtime', bucket_size=bucket_seconds)
1269
+
1270
+ # Format for frontend
1271
+ from datetime import datetime
1272
+ result = []
1273
+ for bucket_time, count in histogram:
1274
+ result.append({
1275
+ 'timestamp': bucket_time,
1276
+ 'date': datetime.fromtimestamp(bucket_time).strftime('%Y-%m-%d %H:%M'),
1277
+ 'count': count
1278
+ })
1279
+
1280
+ return jsonify({
1281
+ 'histogram': result,
1282
+ 'bucket_size_seconds': bucket_seconds,
1283
+ 'total_records': len(records),
1284
+ 'algorithm': 'Bucket Sort',
1285
+ 'complexity': 'O(n + k)'
1286
+ })
1287
+
1288
+
1289
+ @app.route('/api/analytics/percentiles')
1290
+ def api_message_percentiles():
1291
+ """
1292
+ Get message length percentiles using Selection Algorithm.
1293
+
1294
+ Algorithm: Quickselect with Median of Medians
1295
+ Time: O(n) guaranteed
1296
+ Use case: Analyze message length distribution without sorting
1297
+ """
1298
+ timeframe = request.args.get('timeframe', 'all')
1299
+ start_ts, end_ts = parse_timeframe(timeframe)
1300
+
1301
+ conn = get_db()
1302
+
1303
+ cursor = conn.execute('''
1304
+ SELECT LENGTH(text_plain) as length FROM messages
1305
+ WHERE date_unixtime BETWEEN ? AND ?
1306
+ AND text_plain IS NOT NULL
1307
+ ''', (start_ts, end_ts))
1308
+
1309
+ lengths = [row[0] for row in cursor.fetchall() if row[0]]
1310
+ conn.close()
1311
+
1312
+ if not lengths:
1313
+ return jsonify({'error': 'No messages found'}), 404
1314
+
1315
+ # Use our O(n) selection algorithm
1316
+ result = {
1317
+ 'count': len(lengths),
1318
+ 'min': min(lengths),
1319
+ 'max': max(lengths),
1320
+ 'median': find_median(lengths),
1321
+ 'p25': find_percentile(lengths, 25),
1322
+ 'p75': find_percentile(lengths, 75),
1323
+ 'p90': find_percentile(lengths, 90),
1324
+ 'p95': find_percentile(lengths, 95),
1325
+ 'p99': find_percentile(lengths, 99),
1326
+ 'algorithm': 'Quickselect with Median of Medians',
1327
+ 'complexity': 'O(n) guaranteed'
1328
+ }
1329
+
1330
+ return jsonify(result)
1331
+
1332
+
1333
+ # ==========================================
1334
+ # API ENDPOINTS - SEARCH
1335
+ # ==========================================
1336
+
1337
+ @app.route('/api/search')
1338
+ def api_search():
1339
+ """Search messages."""
1340
+ query = request.args.get('q', '')
1341
+ timeframe = request.args.get('timeframe', 'all')
1342
+ start_ts, end_ts = parse_timeframe(timeframe)
1343
+ limit = int(request.args.get('limit', 50))
1344
+ offset = int(request.args.get('offset', 0))
1345
+
1346
+ if not query:
1347
+ return jsonify({'results': [], 'total': 0})
1348
+
1349
+ conn = get_db()
1350
+
1351
+ cursor = conn.execute('''
1352
+ SELECT
1353
+ m.id,
1354
+ m.date,
1355
+ m.from_name,
1356
+ m.from_id,
1357
+ m.text_plain,
1358
+ m.has_links,
1359
+ m.has_media
1360
+ FROM messages_fts
1361
+ JOIN messages m ON messages_fts.rowid = m.id
1362
+ WHERE messages_fts MATCH ?
1363
+ AND m.date_unixtime BETWEEN ? AND ?
1364
+ ORDER BY m.date_unixtime DESC
1365
+ LIMIT ? OFFSET ?
1366
+ ''', (query, start_ts, end_ts, limit, offset))
1367
+
1368
+ results = [{
1369
+ 'id': row['id'],
1370
+ 'date': row['date'],
1371
+ 'from_name': row['from_name'],
1372
+ 'from_id': row['from_id'],
1373
+ 'text': row['text_plain'][:300] if row['text_plain'] else '',
1374
+ 'has_links': bool(row['has_links']),
1375
+ 'has_media': bool(row['has_media'])
1376
+ } for row in cursor.fetchall()]
1377
+
1378
+ conn.close()
1379
+
1380
+ return jsonify({
1381
+ 'results': results,
1382
+ 'query': query,
1383
+ 'limit': limit,
1384
+ 'offset': offset
1385
+ })
1386
+
1387
+
1388
+ # ==========================================
1389
+ # API ENDPOINTS - CHAT VIEW
1390
+ # ==========================================
1391
+
1392
+ @app.route('/api/chat/messages')
1393
+ def api_chat_messages():
1394
+ """Get messages for chat view with filters."""
1395
+ offset = int(request.args.get('offset', 0))
1396
+ limit = int(request.args.get('limit', 50))
1397
+ user_id = request.args.get('user_id')
1398
+ search = request.args.get('search')
1399
+ date_from = request.args.get('date_from')
1400
+ date_to = request.args.get('date_to')
1401
+ has_media = request.args.get('has_media')
1402
+ has_link = request.args.get('has_link')
1403
+
1404
+ conn = get_db()
1405
+
1406
+ # Build query
1407
+ conditions = ["1=1"]
1408
+ params = []
1409
+
1410
+ if user_id:
1411
+ conditions.append("m.from_id = ?")
1412
+ params.append(user_id)
1413
+
1414
+ if date_from:
1415
+ conditions.append("m.date >= ?")
1416
+ params.append(date_from)
1417
+
1418
+ if date_to:
1419
+ conditions.append("m.date <= ?")
1420
+ params.append(date_to)
1421
+
1422
+ if has_media == '1':
1423
+ conditions.append("m.has_media = 1")
1424
+ elif has_media == '0':
1425
+ conditions.append("m.has_media = 0")
1426
+
1427
+ if has_link == '1':
1428
+ conditions.append("m.has_links = 1")
1429
+
1430
+ # Handle FTS search
1431
+ if search:
1432
+ conditions.append("""m.id IN (
1433
+ SELECT rowid FROM messages_fts WHERE messages_fts MATCH ?
1434
+ )""")
1435
+ params.append(search)
1436
+
1437
+ where_clause = " AND ".join(conditions)
1438
+
1439
+ # Get total count
1440
+ cursor = conn.execute(f"SELECT COUNT(*) FROM messages m WHERE {where_clause}", params)
1441
+ total = cursor.fetchone()[0]
1442
+
1443
+ # Get messages with reply info
1444
+ query = f"""
1445
+ SELECT
1446
+ m.id,
1447
+ m.id as message_id,
1448
+ m.date,
1449
+ m.from_id,
1450
+ m.from_name,
1451
+ m.text_plain as text,
1452
+ m.reply_to_message_id,
1453
+ m.forwarded_from,
1454
+ m.forwarded_from_id,
1455
+ m.has_media,
1456
+ m.has_photo,
1457
+ m.has_links as has_link,
1458
+ m.has_mentions,
1459
+ m.is_edited,
1460
+ r.from_name as reply_to_name,
1461
+ substr(r.text_plain, 1, 100) as reply_to_text
1462
+ FROM messages m
1463
+ LEFT JOIN messages r ON m.reply_to_message_id = r.id
1464
+ WHERE {where_clause}
1465
+ ORDER BY m.date ASC
1466
+ LIMIT ? OFFSET ?
1467
+ """
1468
+ params.extend([limit, offset])
1469
+
1470
+ cursor = conn.execute(query, params)
1471
+ messages = [dict(row) for row in cursor.fetchall()]
1472
+
1473
+ # Fetch entities (links, mentions) for these messages
1474
+ if messages:
1475
+ msg_ids = [m['id'] for m in messages]
1476
+ placeholders = ','.join('?' * len(msg_ids))
1477
+ ent_cursor = conn.execute(f"""
1478
+ SELECT message_id, type, value
1479
+ FROM entities
1480
+ WHERE message_id IN ({placeholders})
1481
+ """, msg_ids)
1482
+
1483
+ # Group entities by message_id
1484
+ entities_map = {}
1485
+ for row in ent_cursor.fetchall():
1486
+ mid = row[0]
1487
+ if mid not in entities_map:
1488
+ entities_map[mid] = []
1489
+ entities_map[mid].append({'type': row[1], 'value': row[2]})
1490
+
1491
+ # Attach entities to messages
1492
+ for msg in messages:
1493
+ msg['entities'] = entities_map.get(msg['id'], [])
1494
+
1495
+ conn.close()
1496
+
1497
+ return jsonify({
1498
+ 'messages': messages,
1499
+ 'total': total,
1500
+ 'offset': offset,
1501
+ 'limit': limit,
1502
+ 'has_more': offset + limit < total
1503
+ })
1504
+
1505
+
1506
+ @app.route('/api/chat/thread/<int:message_id>')
1507
+ def api_chat_thread(message_id):
1508
+ """Get conversation thread for a message."""
1509
+ conn = get_db()
1510
+ thread = []
1511
+ visited = set()
1512
+
1513
+ def get_parent(msg_id):
1514
+ """Recursively get parent messages."""
1515
+ if msg_id in visited:
1516
+ return
1517
+ visited.add(msg_id)
1518
+
1519
+ cursor = conn.execute("""
1520
+ SELECT id as message_id, date, from_name, text_plain as text, reply_to_message_id
1521
+ FROM messages WHERE id = ?
1522
+ """, (msg_id,))
1523
+ row = cursor.fetchone()
1524
+
1525
+ if row:
1526
+ if row['reply_to_message_id']:
1527
+ get_parent(row['reply_to_message_id'])
1528
+ thread.append(dict(row))
1529
+
1530
+ def get_children(msg_id):
1531
+ """Get all replies to a message."""
1532
+ cursor = conn.execute("""
1533
+ SELECT id as message_id, date, from_name, text_plain as text, reply_to_message_id
1534
+ FROM messages WHERE reply_to_message_id = ?
1535
+ ORDER BY date
1536
+ """, (msg_id,))
1537
+
1538
+ for row in cursor.fetchall():
1539
+ if row['message_id'] not in visited:
1540
+ visited.add(row['message_id'])
1541
+ thread.append(dict(row))
1542
+ get_children(row['message_id'])
1543
+
1544
+ # Get the original message and its parents
1545
+ get_parent(message_id)
1546
+
1547
+ # Get all replies
1548
+ get_children(message_id)
1549
+
1550
+ conn.close()
1551
+
1552
+ # Sort by date
1553
+ thread.sort(key=lambda x: x['date'])
1554
+
1555
+ return jsonify(thread)
1556
+
1557
+
1558
+ @app.route('/api/chat/context/<int:message_id>')
1559
+ def api_chat_context(message_id):
1560
+ """Get messages around a specific message."""
1561
+ before = int(request.args.get('before', 20))
1562
+ after = int(request.args.get('after', 20))
1563
+
1564
+ conn = get_db()
1565
+
1566
+ # Get target message date
1567
+ cursor = conn.execute("SELECT date FROM messages WHERE id = ?", (message_id,))
1568
+ row = cursor.fetchone()
1569
+
1570
+ if not row:
1571
+ conn.close()
1572
+ return jsonify({'messages': [], 'target_id': message_id})
1573
+
1574
+ target_date = row['date']
1575
+
1576
+ # Get messages before
1577
+ cursor = conn.execute("""
1578
+ SELECT id as message_id, date, from_id, from_name, text_plain as text,
1579
+ reply_to_message_id, has_media, has_links as has_link
1580
+ FROM messages
1581
+ WHERE date < ?
1582
+ ORDER BY date DESC
1583
+ LIMIT ?
1584
+ """, (target_date, before))
1585
+ before_msgs = list(reversed([dict(row) for row in cursor.fetchall()]))
1586
+
1587
+ # Get target message
1588
+ cursor = conn.execute("""
1589
+ SELECT id as message_id, date, from_id, from_name, text_plain as text,
1590
+ reply_to_message_id, has_media, has_links as has_link
1591
+ FROM messages
1592
+ WHERE id = ?
1593
+ """, (message_id,))
1594
+ target_msg = dict(cursor.fetchone())
1595
+
1596
+ # Get messages after
1597
+ cursor = conn.execute("""
1598
+ SELECT id as message_id, date, from_id, from_name, text_plain as text,
1599
+ reply_to_message_id, has_media, has_links as has_link
1600
+ FROM messages
1601
+ WHERE date > ?
1602
+ ORDER BY date ASC
1603
+ LIMIT ?
1604
+ """, (target_date, after))
1605
+ after_msgs = [dict(row) for row in cursor.fetchall()]
1606
+
1607
+ conn.close()
1608
+
1609
+ return jsonify({
1610
+ 'messages': before_msgs + [target_msg] + after_msgs,
1611
+ 'target_id': message_id
1612
+ })
1613
+
1614
+
1615
+ # ==========================================
1616
+ # API ENDPOINTS - AI SEARCH
1617
+ # ==========================================
1618
+
1619
+ # Global AI engine (lazy loaded)
1620
+ _ai_engine = None
1621
+ _ai_engine_init_attempted = False
1622
+
1623
+ def get_ai_engine():
1624
+ """Get or create AI search engine."""
1625
+ global _ai_engine, _ai_engine_init_attempted
1626
+
1627
+ if _ai_engine is not None:
1628
+ return _ai_engine
1629
+
1630
+ if _ai_engine_init_attempted:
1631
+ return None # Already tried and failed
1632
+
1633
+ _ai_engine_init_attempted = True
1634
+
1635
+ try:
1636
+ from ai_search import AISearchEngine
1637
+ import os
1638
+
1639
+ provider = os.getenv('AI_PROVIDER', 'ollama')
1640
+ # Get API key - check both generic and provider-specific env vars
1641
+ api_key = os.getenv('AI_API_KEY') or os.getenv(f'{provider.upper()}_API_KEY')
1642
+
1643
+ print(f"Initializing AI engine with provider: {provider}")
1644
+ _ai_engine = AISearchEngine(DB_PATH, provider, api_key)
1645
+ print(f"AI engine initialized successfully")
1646
+ return _ai_engine
1647
+ except Exception as e:
1648
+ print(f"AI Search not available: {e}")
1649
+ import traceback
1650
+ traceback.print_exc()
1651
+ return None
1652
+
1653
+
1654
+ @app.route('/api/ai/status')
1655
+ def api_ai_status():
1656
+ """Get AI engine status for debugging."""
1657
+ provider = os.getenv('AI_PROVIDER', 'ollama')
1658
+ api_key = os.getenv('AI_API_KEY') or os.getenv(f'{provider.upper()}_API_KEY')
1659
+
1660
+ status = {
1661
+ 'provider': provider,
1662
+ 'api_key_set': bool(api_key),
1663
+ 'api_key_preview': f"{api_key[:8]}..." if api_key and len(api_key) > 8 else None,
1664
+ 'ai_engine_initialized': _ai_engine is not None,
1665
+ 'init_attempted': _ai_engine_init_attempted,
1666
+ 'semantic_search_available': HAS_SEMANTIC_SEARCH,
1667
+ }
1668
+
1669
+ # Check if we can initialize now
1670
+ if _ai_engine is None and not _ai_engine_init_attempted:
1671
+ engine = get_ai_engine()
1672
+ status['ai_engine_initialized'] = engine is not None
1673
+
1674
+ # Check for embeddings
1675
+ if HAS_SEMANTIC_SEARCH:
1676
+ try:
1677
+ ss = get_semantic_search()
1678
+ status['embeddings_available'] = ss.is_available()
1679
+ status['embeddings_stats'] = ss.stats()
1680
+ except Exception as e:
1681
+ status['embeddings_error'] = str(e)
1682
+
1683
+ return jsonify(status)
1684
+
1685
+
1686
+ @app.route('/api/ai/reset')
1687
+ def api_ai_reset():
1688
+ """Reset AI engine to allow re-initialization."""
1689
+ global _ai_engine, _ai_engine_init_attempted
1690
+ _ai_engine = None
1691
+ _ai_engine_init_attempted = False
1692
+ return jsonify({'status': 'reset', 'message': 'AI engine will be reinitialized on next request'})
1693
+
1694
+
1695
+ @app.route('/api/cache/invalidate')
1696
+ def api_cache_invalidate():
1697
+ """Invalidate all caches. Call after DB updates (daily sync, import, etc.)."""
1698
+ invalidate_caches()
1699
+ return jsonify({'status': 'invalidated', 'new_version': _cache_version})
1700
+
1701
+
1702
+ @app.route('/api/embeddings/reload')
1703
+ def api_embeddings_reload():
1704
+ """Reload embeddings from DB (call after daily sync adds new embeddings)."""
1705
+ if not HAS_SEMANTIC_SEARCH:
1706
+ return jsonify({'error': 'Semantic search not available'})
1707
+ try:
1708
+ ss = get_semantic_search()
1709
+ old_count = len(ss.message_ids) if ss.embeddings_loaded else 0
1710
+ ss.reload_embeddings()
1711
+ new_count = len(ss.message_ids)
1712
+ return jsonify({
1713
+ 'status': 'reloaded',
1714
+ 'previous_count': old_count,
1715
+ 'new_count': new_count,
1716
+ 'added': new_count - old_count
1717
+ })
1718
+ except Exception as e:
1719
+ return jsonify({'error': str(e)})
1720
+
1721
+
1722
+ @app.route('/api/ai/search', methods=['POST'])
1723
+ def api_ai_search():
1724
+ """AI-powered natural language search."""
1725
+ data = request.get_json()
1726
+ query = data.get('query', '')
1727
+ mode = data.get('mode', 'auto') # 'auto', 'sql', 'context', or 'semantic'
1728
+
1729
+ if not query:
1730
+ return jsonify({'error': 'Query required'})
1731
+
1732
+ # Semantic mode: Use pre-computed embeddings + AI reasoning
1733
+ if mode == 'semantic':
1734
+ if not HAS_SEMANTIC_SEARCH:
1735
+ return jsonify({'error': 'Semantic search not available. Install sentence-transformers.'})
1736
+ try:
1737
+ ss = get_semantic_search()
1738
+ if not ss.is_available():
1739
+ return jsonify({'error': 'embeddings.db not found. Run the Colab notebook first.'})
1740
+
1741
+ # Get AI engine for reasoning
1742
+ ai_engine = get_ai_engine()
1743
+
1744
+ if ai_engine:
1745
+ # Semantic search + AI reasoning
1746
+ result = ss.search_with_ai_answer(query, ai_engine, limit=30)
1747
+ return jsonify(result)
1748
+ else:
1749
+ # Just semantic search without AI reasoning
1750
+ results = ss.search_with_full_text(query, limit=30)
1751
+ provider = os.getenv('AI_PROVIDER', 'ollama')
1752
+ api_key_set = bool(os.getenv('AI_API_KEY') or os.getenv(f'{provider.upper()}_API_KEY'))
1753
+ return jsonify({
1754
+ 'query': query,
1755
+ 'mode': 'semantic',
1756
+ 'results': results,
1757
+ 'count': len(results),
1758
+ 'answer': f"נמצאו {len(results)} הודעות דומות סמנטית לשאילתה.\n\n⚠️ AI לא זמין - בדוק שה-API key מוגדר (provider: {provider}, key set: {api_key_set})"
1759
+ })
1760
+ except Exception as e:
1761
+ return jsonify({'error': f'Semantic search error: {str(e)}'})
1762
+
1763
+ engine = get_ai_engine()
1764
+
1765
+ if engine is None:
1766
+ # Fallback: Use basic SQL search
1767
+ return fallback_ai_search(query)
1768
+
1769
+ try:
1770
+ # Context mode: AI reads messages and reasons over them
1771
+ if mode == 'context':
1772
+ result = engine.context_search(query)
1773
+ # SQL mode: Generate SQL and execute
1774
+ elif mode == 'sql':
1775
+ result = engine.search(query, generate_answer=True)
1776
+ # Auto mode: Try SQL first, fall back to context if no results
1777
+ else:
1778
+ result = engine.search(query, generate_answer=True)
1779
+ # If no results or error, try context search
1780
+ if result.get('count', 0) == 0 or 'error' in result:
1781
+ result = engine.context_search(query)
1782
+
1783
+ return jsonify(result)
1784
+ except Exception as e:
1785
+ return jsonify({'error': str(e), 'query': query})
1786
+
1787
+
1788
+ def fallback_ai_search(query: str):
1789
+ """Fallback search when AI is not available."""
1790
+ conn = get_db()
1791
+
1792
+ # Simple keyword extraction and search
1793
+ keywords = [w for w in query.split() if len(w) > 2]
1794
+
1795
+ if not keywords:
1796
+ return jsonify({'error': 'No valid keywords', 'query': query})
1797
+
1798
+ # Build FTS query
1799
+ fts_query = ' OR '.join(keywords)
1800
+
1801
+ try:
1802
+ cursor = conn.execute('''
1803
+ SELECT
1804
+ m.id as message_id, m.date, m.from_name, m.text_plain as text
1805
+ FROM messages_fts
1806
+ JOIN messages m ON messages_fts.rowid = m.id
1807
+ WHERE messages_fts MATCH ?
1808
+ ORDER BY m.date DESC
1809
+ LIMIT 20
1810
+ ''', (fts_query,))
1811
+
1812
+ results = [dict(row) for row in cursor.fetchall()]
1813
+ conn.close()
1814
+
1815
+ # Generate simple answer
1816
+ if results:
1817
+ answer = f"נמצאו {len(results)} הודעות עם המילים: {', '.join(keywords)}"
1818
+ else:
1819
+ answer = f"לא נמצאו הודעות עם המילים: {', '.join(keywords)}"
1820
+
1821
+ return jsonify({
1822
+ 'query': query,
1823
+ 'sql': f"FTS MATCH: {fts_query}",
1824
+ 'results': results,
1825
+ 'count': len(results),
1826
+ 'answer': answer,
1827
+ 'fallback': True
1828
+ })
1829
+
1830
+ except Exception as e:
1831
+ conn.close()
1832
+ return jsonify({'error': str(e), 'query': query})
1833
+
1834
+
1835
+ @app.route('/api/ai/thread/<int:message_id>')
1836
+ def api_ai_thread(message_id):
1837
+ """Get full thread using AI-powered analysis."""
1838
+ engine = get_ai_engine()
1839
+
1840
+ if engine is None:
1841
+ # Use basic thread retrieval
1842
+ return api_chat_thread(message_id)
1843
+
1844
+ try:
1845
+ thread = engine.get_thread(message_id)
1846
+ return jsonify(thread)
1847
+ except Exception as e:
1848
+ return jsonify({'error': str(e)})
1849
+
1850
+
1851
+ @app.route('/api/ai/similar/<int:message_id>')
1852
+ def api_ai_similar(message_id):
1853
+ """Find similar messages."""
1854
+ limit = int(request.args.get('limit', 10))
1855
+
1856
+ engine = get_ai_engine()
1857
+
1858
+ if engine is None:
1859
+ return jsonify({'error': 'AI not available'})
1860
+
1861
+ try:
1862
+ similar = engine.find_similar_messages(message_id, limit)
1863
+ return jsonify(similar)
1864
+ except Exception as e:
1865
+ return jsonify({'error': str(e)})
1866
+
1867
+
1868
+ # ==========================================
1869
+ # API ENDPOINTS - DATABASE UPDATE
1870
+ # ==========================================
1871
+
1872
+ @app.route('/api/update', methods=['POST'])
1873
+ def api_update_database():
1874
+ """
1875
+ Update database with new JSON data.
1876
+ Disabled in production - updates are done locally via daily_sync.py.
1877
+ """
1878
+ return jsonify({'error': 'Database updates are disabled on this server. Run daily_sync.py locally.'}), 403
1879
+ try:
1880
+ # Check if file was uploaded
1881
+ if 'file' in request.files:
1882
+ file = request.files['file']
1883
+ if file.filename == '':
1884
+ return jsonify({'error': 'No file selected'}), 400
1885
+
1886
+ # Read and parse JSON
1887
+ try:
1888
+ json_data = json.loads(file.read().decode('utf-8'))
1889
+ except json.JSONDecodeError as e:
1890
+ return jsonify({'error': f'Invalid JSON: {str(e)}'}), 400
1891
+ else:
1892
+ # Try to get JSON from request body
1893
+ json_data = request.get_json()
1894
+ if not json_data:
1895
+ return jsonify({'error': 'No JSON data provided'}), 400
1896
+
1897
+ # Import and use IncrementalIndexer
1898
+ from indexer import IncrementalIndexer
1899
+
1900
+ indexer = IncrementalIndexer(DB_PATH)
1901
+ try:
1902
+ stats = indexer.update_from_json_data(json_data, show_progress=False)
1903
+ finally:
1904
+ indexer.close()
1905
+
1906
+ return jsonify({
1907
+ 'success': True,
1908
+ 'stats': {
1909
+ 'total_in_file': stats['total_in_file'],
1910
+ 'new_messages': stats['new_messages'],
1911
+ 'duplicates': stats['duplicates'],
1912
+ 'entities': stats['entities'],
1913
+ 'elapsed_seconds': round(stats['elapsed_seconds'], 2)
1914
+ }
1915
+ })
1916
+
1917
+ except FileNotFoundError as e:
1918
+ return jsonify({'error': str(e)}), 404
1919
+ except Exception as e:
1920
+ return jsonify({'error': str(e)}), 500
1921
+
1922
+
1923
+ @app.route('/api/db/stats')
1924
+ def api_db_stats():
1925
+ """Get database statistics."""
1926
+ conn = get_db()
1927
+
1928
+ stats = {}
1929
+
1930
+ # Total messages
1931
+ cursor = conn.execute('SELECT COUNT(*) FROM messages')
1932
+ stats['total_messages'] = cursor.fetchone()[0]
1933
+
1934
+ # Total users
1935
+ cursor = conn.execute('SELECT COUNT(DISTINCT from_id) FROM messages WHERE from_id IS NOT NULL')
1936
+ stats['total_users'] = cursor.fetchone()[0]
1937
+
1938
+ # Date range
1939
+ cursor = conn.execute('SELECT MIN(date), MAX(date) FROM messages')
1940
+ row = cursor.fetchone()
1941
+ stats['first_message'] = row[0]
1942
+ stats['last_message'] = row[1]
1943
+
1944
+ # Database file size
1945
+ import os
1946
+ if os.path.exists(DB_PATH):
1947
+ stats['db_size_mb'] = round(os.path.getsize(DB_PATH) / (1024 * 1024), 2)
1948
+
1949
+ conn.close()
1950
+
1951
+ return jsonify(stats)
1952
+
1953
+
1954
+ # ==========================================
1955
+ # API ENDPOINTS - EXPORT
1956
+ # ==========================================
1957
+
1958
+ @app.route('/api/export/users')
1959
+ def api_export_users():
1960
+ """Export user data as CSV."""
1961
+ timeframe = request.args.get('timeframe', 'all')
1962
+ start_ts, end_ts = parse_timeframe(timeframe)
1963
+
1964
+ conn = get_db()
1965
+
1966
+ cursor = conn.execute('''
1967
+ SELECT
1968
+ from_id,
1969
+ from_name,
1970
+ COUNT(*) as message_count,
1971
+ SUM(LENGTH(text_plain)) as char_count,
1972
+ SUM(has_links) as links,
1973
+ SUM(has_media) as media,
1974
+ MIN(date_unixtime) as first_seen,
1975
+ MAX(date_unixtime) as last_seen
1976
+ FROM messages
1977
+ WHERE date_unixtime BETWEEN ? AND ?
1978
+ AND from_id IS NOT NULL
1979
+ GROUP BY from_id
1980
+ ORDER BY message_count DESC
1981
+ ''', (start_ts, end_ts))
1982
+
1983
+ output = io.StringIO()
1984
+ writer = csv.writer(output)
1985
+ writer.writerow(['User ID', 'Name', 'Messages', 'Characters', 'Links', 'Media', 'First Seen', 'Last Seen'])
1986
+
1987
+ for row in cursor.fetchall():
1988
+ writer.writerow([
1989
+ row['from_id'],
1990
+ row['from_name'],
1991
+ row['message_count'],
1992
+ row['char_count'] or 0,
1993
+ row['links'] or 0,
1994
+ row['media'] or 0,
1995
+ datetime.fromtimestamp(row['first_seen']).isoformat() if row['first_seen'] else '',
1996
+ datetime.fromtimestamp(row['last_seen']).isoformat() if row['last_seen'] else ''
1997
+ ])
1998
+
1999
+ conn.close()
2000
+
2001
+ output.seek(0)
2002
+ return Response(
2003
+ output.getvalue(),
2004
+ mimetype='text/csv',
2005
+ headers={'Content-Disposition': 'attachment; filename=users_export.csv'}
2006
+ )
2007
+
2008
+
2009
+ @app.route('/api/export/messages')
2010
+ def api_export_messages():
2011
+ """Export messages as CSV."""
2012
+ timeframe = request.args.get('timeframe', 'all')
2013
+ start_ts, end_ts = parse_timeframe(timeframe)
2014
+ limit = int(request.args.get('limit', 10000))
2015
+
2016
+ conn = get_db()
2017
+
2018
+ cursor = conn.execute('''
2019
+ SELECT
2020
+ id, date, from_id, from_name, text_plain,
2021
+ has_links, has_media, has_mentions,
2022
+ reply_to_message_id
2023
+ FROM messages
2024
+ WHERE date_unixtime BETWEEN ? AND ?
2025
+ ORDER BY date_unixtime DESC
2026
+ LIMIT ?
2027
+ ''', (start_ts, end_ts, limit))
2028
+
2029
+ output = io.StringIO()
2030
+ writer = csv.writer(output)
2031
+ writer.writerow(['ID', 'Date', 'User ID', 'User Name', 'Text', 'Has Links', 'Has Media', 'Has Mentions', 'Reply To'])
2032
+
2033
+ for row in cursor.fetchall():
2034
+ writer.writerow([
2035
+ row['id'],
2036
+ row['date'],
2037
+ row['from_id'],
2038
+ row['from_name'],
2039
+ row['text_plain'][:500] if row['text_plain'] else '',
2040
+ row['has_links'],
2041
+ row['has_media'],
2042
+ row['has_mentions'],
2043
+ row['reply_to_message_id']
2044
+ ])
2045
+
2046
+ conn.close()
2047
+
2048
+ output.seek(0)
2049
+ return Response(
2050
+ output.getvalue(),
2051
+ mimetype='text/csv',
2052
+ headers={'Content-Disposition': 'attachment; filename=messages_export.csv'}
2053
+ )
2054
+
2055
+
2056
+ # ==========================================
2057
+ # MAIN
2058
+ # ==========================================
2059
+
2060
+ def main():
2061
+ import argparse
2062
+ parser = argparse.ArgumentParser(description='Telegram Analytics Dashboard')
2063
+ parser.add_argument('--db', default=os.environ.get('DB_PATH', 'telegram.db'), help='Database path')
2064
+ parser.add_argument('--port', type=int, default=int(os.environ.get('PORT', 5000)), help='Server port')
2065
+ parser.add_argument('--host', default=os.environ.get('HOST', '127.0.0.1'), help='Server host')
2066
+ parser.add_argument('--debug', action='store_true', help='Debug mode')
2067
+
2068
+ args = parser.parse_args()
2069
+
2070
+ global DB_PATH
2071
+ DB_PATH = args.db
2072
+
2073
+ print(f"""
2074
+ ╔══════════════════════════════════════════════════════════════╗
2075
+ ║ TELEGRAM ANALYTICS DASHBOARD ║
2076
+ ╠══════════════════════════════════════════════════════════════╣
2077
+ ║ Database: {args.db:47} ║
2078
+ ║ Server: http://{args.host}:{args.port:<37} ║
2079
+ ╚══════════════════════════════════════════════════════════════╝
2080
+ """)
2081
+
2082
+ app.run(host=args.host, port=args.port, debug=args.debug)
2083
+
2084
+
2085
+ if __name__ == '__main__':
2086
+ main()
data_structures.py ADDED
@@ -0,0 +1,773 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Advanced Data Structures for Efficient Search and Traversal
4
+
5
+ Includes:
6
+ - Bloom Filter: O(1) "definitely not in set" checks
7
+ - Trie: O(k) prefix search and autocomplete
8
+ - LRU Cache: O(1) cached query results
9
+ - Graph algorithms: DFS, BFS for thread traversal
10
+ """
11
+
12
+ import hashlib
13
+ import math
14
+ from collections import OrderedDict, defaultdict, deque
15
+ from typing import Any, Callable, Generator, Iterator, Optional
16
+ from functools import wraps
17
+
18
+
19
+ # ============================================
20
+ # BLOOM FILTER
21
+ # ============================================
22
+
23
+ class BloomFilter:
24
+ """
25
+ Space-efficient probabilistic data structure for set membership testing.
26
+
27
+ - O(k) insert and lookup where k is number of hash functions
28
+ - False positives possible, false negatives impossible
29
+ - Use case: Quick "message ID exists?" check before DB query
30
+
31
+ Example:
32
+ bf = BloomFilter(expected_items=100000, fp_rate=0.01)
33
+ bf.add("message_123")
34
+ if "message_123" in bf: # O(1) check
35
+ # Might exist, check DB
36
+ else:
37
+ # Definitely doesn't exist, skip DB
38
+ """
39
+
40
+ def __init__(self, expected_items: int = 100000, fp_rate: float = 0.01):
41
+ """
42
+ Initialize Bloom filter.
43
+
44
+ Args:
45
+ expected_items: Expected number of items to store
46
+ fp_rate: Desired false positive rate (0.01 = 1%)
47
+ """
48
+ # Calculate optimal size and hash count
49
+ self.size = self._optimal_size(expected_items, fp_rate)
50
+ self.hash_count = self._optimal_hash_count(self.size, expected_items)
51
+ self.bit_array = bytearray(math.ceil(self.size / 8))
52
+ self.count = 0
53
+
54
+ @staticmethod
55
+ def _optimal_size(n: int, p: float) -> int:
56
+ """Calculate optimal bit array size: m = -n*ln(p) / (ln2)^2"""
57
+ return int(-n * math.log(p) / (math.log(2) ** 2))
58
+
59
+ @staticmethod
60
+ def _optimal_hash_count(m: int, n: int) -> int:
61
+ """Calculate optimal hash count: k = (m/n) * ln2"""
62
+ return max(1, int((m / n) * math.log(2)))
63
+
64
+ def _get_hash_values(self, item: str) -> Generator[int, None, None]:
65
+ """Generate k hash values using double hashing technique."""
66
+ h1 = int(hashlib.md5(item.encode()).hexdigest(), 16)
67
+ h2 = int(hashlib.sha1(item.encode()).hexdigest(), 16)
68
+ for i in range(self.hash_count):
69
+ yield (h1 + i * h2) % self.size
70
+
71
+ def add(self, item: str) -> None:
72
+ """Add an item to the filter. O(k) where k is hash count."""
73
+ for pos in self._get_hash_values(item):
74
+ byte_idx, bit_idx = divmod(pos, 8)
75
+ self.bit_array[byte_idx] |= (1 << bit_idx)
76
+ self.count += 1
77
+
78
+ def __contains__(self, item: str) -> bool:
79
+ """Check if item might be in the filter. O(k)."""
80
+ for pos in self._get_hash_values(item):
81
+ byte_idx, bit_idx = divmod(pos, 8)
82
+ if not (self.bit_array[byte_idx] & (1 << bit_idx)):
83
+ return False # Definitely not in set
84
+ return True # Might be in set
85
+
86
+ def __len__(self) -> int:
87
+ return self.count
88
+
89
+ @property
90
+ def memory_usage(self) -> int:
91
+ """Return memory usage in bytes."""
92
+ return len(self.bit_array)
93
+
94
+
95
+ # ============================================
96
+ # TRIE (PREFIX TREE)
97
+ # ============================================
98
+
99
+ class TrieNode:
100
+ """Node in a Trie data structure."""
101
+ __slots__ = ['children', 'is_end', 'data', 'count']
102
+
103
+ def __init__(self):
104
+ self.children: dict[str, TrieNode] = {}
105
+ self.is_end: bool = False
106
+ self.data: Any = None # Store associated data (e.g., message IDs)
107
+ self.count: int = 0 # Frequency count
108
+
109
+
110
+ class Trie:
111
+ """
112
+ Trie (Prefix Tree) for fast prefix-based search and autocomplete.
113
+
114
+ - O(k) insert/search where k is key length
115
+ - O(p + n) prefix search where p is prefix length, n is results
116
+ - Use case: Autocomplete usernames, find all messages starting with prefix
117
+
118
+ Example:
119
+ trie = Trie()
120
+ trie.insert("@username1", message_ids=[1, 2, 3])
121
+ trie.insert("@username2", message_ids=[4, 5])
122
+
123
+ results = trie.search_prefix("@user") # Returns both
124
+ completions = trie.autocomplete("@user", limit=5)
125
+ """
126
+
127
+ def __init__(self):
128
+ self.root = TrieNode()
129
+ self.size = 0
130
+
131
+ def insert(self, key: str, data: Any = None) -> None:
132
+ """Insert a key with optional associated data. O(k)."""
133
+ node = self.root
134
+ for char in key.lower():
135
+ if char not in node.children:
136
+ node.children[char] = TrieNode()
137
+ node = node.children[char]
138
+ node.count += 1
139
+
140
+ if not node.is_end:
141
+ self.size += 1
142
+ node.is_end = True
143
+
144
+ # Store or append data
145
+ if data is not None:
146
+ if node.data is None:
147
+ node.data = []
148
+ if isinstance(data, list):
149
+ node.data.extend(data)
150
+ else:
151
+ node.data.append(data)
152
+
153
+ def search(self, key: str) -> Optional[Any]:
154
+ """Search for exact key. O(k). Returns associated data or None."""
155
+ node = self._find_node(key.lower())
156
+ return node.data if node and node.is_end else None
157
+
158
+ def __contains__(self, key: str) -> bool:
159
+ """Check if key exists. O(k)."""
160
+ node = self._find_node(key.lower())
161
+ return node is not None and node.is_end
162
+
163
+ def _find_node(self, prefix: str) -> Optional[TrieNode]:
164
+ """Find the node for a given prefix."""
165
+ node = self.root
166
+ for char in prefix:
167
+ if char not in node.children:
168
+ return None
169
+ node = node.children[char]
170
+ return node
171
+
172
+ def search_prefix(self, prefix: str) -> list[tuple[str, Any]]:
173
+ """
174
+ Find all keys with given prefix. O(p + n).
175
+ Returns list of (key, data) tuples.
176
+ """
177
+ results = []
178
+ node = self._find_node(prefix.lower())
179
+ if node:
180
+ self._collect_all(node, prefix.lower(), results)
181
+ return results
182
+
183
+ def _collect_all(
184
+ self,
185
+ node: TrieNode,
186
+ prefix: str,
187
+ results: list[tuple[str, Any]]
188
+ ) -> None:
189
+ """Recursively collect all keys under a node."""
190
+ if node.is_end:
191
+ results.append((prefix, node.data))
192
+ for char, child in node.children.items():
193
+ self._collect_all(child, prefix + char, results)
194
+
195
+ def autocomplete(self, prefix: str, limit: int = 10) -> list[str]:
196
+ """
197
+ Get autocomplete suggestions for prefix.
198
+ Returns most frequent completions up to limit.
199
+ """
200
+ node = self._find_node(prefix.lower())
201
+ if not node:
202
+ return []
203
+
204
+ suggestions = []
205
+ self._collect_suggestions(node, prefix.lower(), suggestions)
206
+
207
+ # Sort by frequency and return top results
208
+ suggestions.sort(key=lambda x: x[1], reverse=True)
209
+ return [s[0] for s in suggestions[:limit]]
210
+
211
+ def _collect_suggestions(
212
+ self,
213
+ node: TrieNode,
214
+ prefix: str,
215
+ suggestions: list[tuple[str, int]]
216
+ ) -> None:
217
+ """Collect suggestions with their frequency counts."""
218
+ if node.is_end:
219
+ suggestions.append((prefix, node.count))
220
+ for char, child in node.children.items():
221
+ self._collect_suggestions(child, prefix + char, suggestions)
222
+
223
+ def __len__(self) -> int:
224
+ return self.size
225
+
226
+
227
+ # ============================================
228
+ # LRU CACHE
229
+ # ============================================
230
+
231
+ class LRUCache:
232
+ """
233
+ Least Recently Used (LRU) Cache for query results.
234
+
235
+ - O(1) get/put operations
236
+ - Automatically evicts least recently used items when full
237
+ - Use case: Cache expensive query results
238
+
239
+ Example:
240
+ cache = LRUCache(maxsize=1000)
241
+ cache.put("query:hello", results)
242
+ results = cache.get("query:hello") # O(1)
243
+ """
244
+
245
+ def __init__(self, maxsize: int = 1000):
246
+ self.maxsize = maxsize
247
+ self.cache: OrderedDict[str, Any] = OrderedDict()
248
+ self.hits = 0
249
+ self.misses = 0
250
+
251
+ def get(self, key: str) -> Optional[Any]:
252
+ """Get item from cache. O(1). Returns None if not found."""
253
+ if key in self.cache:
254
+ self.cache.move_to_end(key)
255
+ self.hits += 1
256
+ return self.cache[key]
257
+ self.misses += 1
258
+ return None
259
+
260
+ def put(self, key: str, value: Any) -> None:
261
+ """Put item in cache. O(1). Evicts LRU item if full."""
262
+ if key in self.cache:
263
+ self.cache.move_to_end(key)
264
+ else:
265
+ if len(self.cache) >= self.maxsize:
266
+ self.cache.popitem(last=False)
267
+ self.cache[key] = value
268
+
269
+ def __contains__(self, key: str) -> bool:
270
+ return key in self.cache
271
+
272
+ def __len__(self) -> int:
273
+ return len(self.cache)
274
+
275
+ def clear(self) -> None:
276
+ """Clear the cache."""
277
+ self.cache.clear()
278
+ self.hits = 0
279
+ self.misses = 0
280
+
281
+ @property
282
+ def hit_rate(self) -> float:
283
+ """Return cache hit rate."""
284
+ total = self.hits + self.misses
285
+ return self.hits / total if total > 0 else 0.0
286
+
287
+ @property
288
+ def stats(self) -> dict:
289
+ """Return cache statistics."""
290
+ return {
291
+ 'size': len(self.cache),
292
+ 'maxsize': self.maxsize,
293
+ 'hits': self.hits,
294
+ 'misses': self.misses,
295
+ 'hit_rate': self.hit_rate
296
+ }
297
+
298
+
299
+ def lru_cached(cache: LRUCache, key_func: Callable[..., str] = None):
300
+ """
301
+ Decorator to cache function results using LRUCache.
302
+
303
+ Example:
304
+ cache = LRUCache(1000)
305
+
306
+ @lru_cached(cache, key_func=lambda q, **kw: f"search:{q}")
307
+ def search(query, limit=100):
308
+ return expensive_search(query, limit)
309
+ """
310
+ def decorator(func: Callable) -> Callable:
311
+ @wraps(func)
312
+ def wrapper(*args, **kwargs):
313
+ if key_func:
314
+ key = key_func(*args, **kwargs)
315
+ else:
316
+ key = f"{func.__name__}:{args}:{kwargs}"
317
+
318
+ result = cache.get(key)
319
+ if result is not None:
320
+ return result
321
+
322
+ result = func(*args, **kwargs)
323
+ cache.put(key, result)
324
+ return result
325
+ return wrapper
326
+ return decorator
327
+
328
+
329
+ # ============================================
330
+ # GRAPH ALGORITHMS FOR REPLY THREADS
331
+ # ============================================
332
+
333
+ class ReplyGraph:
334
+ """
335
+ Graph structure for message reply relationships.
336
+
337
+ Supports:
338
+ - DFS: Depth-first traversal for finding all descendants
339
+ - BFS: Breadth-first traversal for level-order exploration
340
+ - Connected components: Find isolated conversation threads
341
+ - Topological sort: Order messages by reply chain
342
+
343
+ Time complexity: O(V + E) for traversals
344
+ Space complexity: O(V) for visited set
345
+ """
346
+
347
+ def __init__(self):
348
+ # Adjacency lists
349
+ self.children: dict[int, list[int]] = defaultdict(list) # parent -> [children]
350
+ self.parents: dict[int, int] = {} # child -> parent
351
+ self.nodes: set[int] = set()
352
+
353
+ def add_edge(self, parent_id: int, child_id: int) -> None:
354
+ """Add a reply relationship. O(1)."""
355
+ self.children[parent_id].append(child_id)
356
+ self.parents[child_id] = parent_id
357
+ self.nodes.add(parent_id)
358
+ self.nodes.add(child_id)
359
+
360
+ def add_message(self, message_id: int, reply_to: Optional[int] = None) -> None:
361
+ """Add a message, optionally with its reply relationship."""
362
+ self.nodes.add(message_id)
363
+ if reply_to is not None:
364
+ self.add_edge(reply_to, message_id)
365
+
366
+ def get_children(self, message_id: int) -> list[int]:
367
+ """Get direct replies to a message. O(1)."""
368
+ return self.children.get(message_id, [])
369
+
370
+ def get_parent(self, message_id: int) -> Optional[int]:
371
+ """Get the message this is a reply to. O(1)."""
372
+ return self.parents.get(message_id)
373
+
374
+ # ==================
375
+ # DFS - Depth First Search
376
+ # ==================
377
+
378
+ def dfs_descendants(self, start_id: int) -> list[int]:
379
+ """
380
+ DFS: Get all descendants of a message (entire sub-thread).
381
+
382
+ Time: O(V + E)
383
+ Space: O(V)
384
+
385
+ Returns messages in DFS order (deep before wide).
386
+ """
387
+ result = []
388
+ visited = set()
389
+
390
+ def dfs(node_id: int) -> None:
391
+ if node_id in visited:
392
+ return
393
+ visited.add(node_id)
394
+ result.append(node_id)
395
+ for child_id in self.children.get(node_id, []):
396
+ dfs(child_id)
397
+
398
+ dfs(start_id)
399
+ return result
400
+
401
+ def dfs_iterative(self, start_id: int) -> Iterator[int]:
402
+ """
403
+ Iterative DFS using explicit stack (avoids recursion limit).
404
+
405
+ Yields message IDs in DFS order.
406
+ """
407
+ stack = [start_id]
408
+ visited = set()
409
+
410
+ while stack:
411
+ node_id = stack.pop()
412
+ if node_id in visited:
413
+ continue
414
+ visited.add(node_id)
415
+ yield node_id
416
+
417
+ # Add children in reverse order for correct DFS order
418
+ for child_id in reversed(self.children.get(node_id, [])):
419
+ if child_id not in visited:
420
+ stack.append(child_id)
421
+
422
+ # ==================
423
+ # BFS - Breadth First Search
424
+ # ==================
425
+
426
+ def bfs_descendants(self, start_id: int) -> list[int]:
427
+ """
428
+ BFS: Get all descendants level by level.
429
+
430
+ Time: O(V + E)
431
+ Space: O(V)
432
+
433
+ Returns messages in BFS order (level by level).
434
+ """
435
+ result = []
436
+ visited = set()
437
+ queue = deque([start_id])
438
+
439
+ while queue:
440
+ node_id = queue.popleft()
441
+ if node_id in visited:
442
+ continue
443
+ visited.add(node_id)
444
+ result.append(node_id)
445
+
446
+ for child_id in self.children.get(node_id, []):
447
+ if child_id not in visited:
448
+ queue.append(child_id)
449
+
450
+ return result
451
+
452
+ def bfs_with_depth(self, start_id: int) -> list[tuple[int, int]]:
453
+ """
454
+ BFS with depth information.
455
+
456
+ Returns list of (message_id, depth) tuples.
457
+ """
458
+ result = []
459
+ visited = set()
460
+ queue = deque([(start_id, 0)])
461
+
462
+ while queue:
463
+ node_id, depth = queue.popleft()
464
+ if node_id in visited:
465
+ continue
466
+ visited.add(node_id)
467
+ result.append((node_id, depth))
468
+
469
+ for child_id in self.children.get(node_id, []):
470
+ if child_id not in visited:
471
+ queue.append((child_id, depth + 1))
472
+
473
+ return result
474
+
475
+ # ==================
476
+ # THREAD RECONSTRUCTION
477
+ # ==================
478
+
479
+ def get_thread_root(self, message_id: int) -> int:
480
+ """
481
+ Find the root message of a thread. O(d) where d is depth.
482
+ """
483
+ current = message_id
484
+ while current in self.parents:
485
+ current = self.parents[current]
486
+ return current
487
+
488
+ def get_full_thread(self, message_id: int) -> list[int]:
489
+ """
490
+ Get the complete thread containing a message.
491
+
492
+ 1. Find root via parent traversal
493
+ 2. BFS from root to get all descendants
494
+ """
495
+ root = self.get_thread_root(message_id)
496
+ return self.bfs_descendants(root)
497
+
498
+ def get_ancestors(self, message_id: int) -> list[int]:
499
+ """
500
+ Get all ancestors (path to root). O(d).
501
+
502
+ Returns in order from message to root.
503
+ """
504
+ ancestors = []
505
+ current = message_id
506
+ while current in self.parents:
507
+ parent = self.parents[current]
508
+ ancestors.append(parent)
509
+ current = parent
510
+ return ancestors
511
+
512
+ def get_thread_path(self, message_id: int) -> list[int]:
513
+ """
514
+ Get path from root to message. O(d).
515
+ """
516
+ path = [message_id]
517
+ current = message_id
518
+ while current in self.parents:
519
+ parent = self.parents[current]
520
+ path.append(parent)
521
+ current = parent
522
+ return list(reversed(path))
523
+
524
+ # ==================
525
+ # CONNECTED COMPONENTS
526
+ # ==================
527
+
528
+ def find_connected_components(self) -> list[set[int]]:
529
+ """
530
+ Find all isolated conversation threads.
531
+
532
+ Time: O(V + E)
533
+
534
+ Returns list of sets, each set is a connected thread.
535
+ """
536
+ visited = set()
537
+ components = []
538
+
539
+ for node in self.nodes:
540
+ if node not in visited:
541
+ component = set()
542
+ # Use BFS to find all connected nodes
543
+ queue = deque([node])
544
+ while queue:
545
+ current = queue.popleft()
546
+ if current in visited:
547
+ continue
548
+ visited.add(current)
549
+ component.add(current)
550
+
551
+ # Add parent
552
+ if current in self.parents:
553
+ parent = self.parents[current]
554
+ if parent not in visited:
555
+ queue.append(parent)
556
+
557
+ # Add children
558
+ for child in self.children.get(current, []):
559
+ if child not in visited:
560
+ queue.append(child)
561
+
562
+ components.append(component)
563
+
564
+ return components
565
+
566
+ def get_thread_roots(self) -> list[int]:
567
+ """Get all thread root messages (messages with no parent)."""
568
+ return [node for node in self.nodes if node not in self.parents]
569
+
570
+ # ==================
571
+ # STATISTICS
572
+ # ==================
573
+
574
+ def get_thread_depth(self, root_id: int) -> int:
575
+ """Get maximum depth of a thread from root."""
576
+ max_depth = 0
577
+ for _, depth in self.bfs_with_depth(root_id):
578
+ max_depth = max(max_depth, depth)
579
+ return max_depth
580
+
581
+ def get_subtree_size(self, message_id: int) -> int:
582
+ """Get number of messages in subtree including root."""
583
+ return len(self.dfs_descendants(message_id))
584
+
585
+ @property
586
+ def stats(self) -> dict:
587
+ """Get graph statistics."""
588
+ return {
589
+ 'total_nodes': len(self.nodes),
590
+ 'total_edges': sum(len(children) for children in self.children.values()),
591
+ 'root_messages': len(self.get_thread_roots()),
592
+ 'connected_components': len(self.find_connected_components())
593
+ }
594
+
595
+
596
+ # ============================================
597
+ # TRIGRAM SIMILARITY
598
+ # ============================================
599
+
600
+ def generate_trigrams(text: str) -> set[str]:
601
+ """
602
+ Generate trigrams (3-character subsequences) for fuzzy matching.
603
+
604
+ Example: "hello" -> {"hel", "ell", "llo"}
605
+ """
606
+ text = text.lower().strip()
607
+ if len(text) < 3:
608
+ return {text} if text else set()
609
+ return {text[i:i+3] for i in range(len(text) - 2)}
610
+
611
+
612
+ def trigram_similarity(text1: str, text2: str) -> float:
613
+ """
614
+ Calculate Jaccard similarity between trigram sets.
615
+
616
+ Returns value between 0 (no similarity) and 1 (identical).
617
+ """
618
+ tri1 = generate_trigrams(text1)
619
+ tri2 = generate_trigrams(text2)
620
+
621
+ if not tri1 or not tri2:
622
+ return 0.0
623
+
624
+ intersection = len(tri1 & tri2)
625
+ union = len(tri1 | tri2)
626
+
627
+ return intersection / union if union > 0 else 0.0
628
+
629
+
630
+ class TrigramIndex:
631
+ """
632
+ Inverted index of trigrams for fuzzy search.
633
+
634
+ Time complexity:
635
+ - Insert: O(k) where k is text length
636
+ - Search: O(t * m) where t is trigrams in query, m is avg matches
637
+
638
+ Example:
639
+ index = TrigramIndex()
640
+ index.add(1, "שלום עולם")
641
+ index.add(2, "שלום לכולם")
642
+
643
+ results = index.search("שלום", threshold=0.3)
644
+ """
645
+
646
+ def __init__(self):
647
+ self.index: dict[str, set[int]] = defaultdict(set)
648
+ self.texts: dict[int, str] = {}
649
+
650
+ def add(self, doc_id: int, text: str) -> None:
651
+ """Add a document to the index."""
652
+ self.texts[doc_id] = text
653
+ for trigram in generate_trigrams(text):
654
+ self.index[trigram].add(doc_id)
655
+
656
+ def search(self, query: str, threshold: float = 0.3, limit: int = 100) -> list[tuple[int, float]]:
657
+ """
658
+ Search for documents similar to query.
659
+
660
+ Returns list of (doc_id, similarity) tuples, sorted by similarity.
661
+ """
662
+ query_trigrams = generate_trigrams(query)
663
+ if not query_trigrams:
664
+ return []
665
+
666
+ # Find candidate documents
667
+ candidates: dict[int, int] = defaultdict(int)
668
+ for trigram in query_trigrams:
669
+ for doc_id in self.index.get(trigram, []):
670
+ candidates[doc_id] += 1
671
+
672
+ # Calculate similarity for candidates
673
+ results = []
674
+ query_len = len(query_trigrams)
675
+
676
+ for doc_id, match_count in candidates.items():
677
+ doc_trigrams = generate_trigrams(self.texts[doc_id])
678
+ doc_len = len(doc_trigrams)
679
+
680
+ # Jaccard similarity approximation
681
+ similarity = match_count / (query_len + doc_len - match_count)
682
+
683
+ if similarity >= threshold:
684
+ results.append((doc_id, similarity))
685
+
686
+ # Sort by similarity descending
687
+ results.sort(key=lambda x: x[1], reverse=True)
688
+ return results[:limit]
689
+
690
+ def __len__(self) -> int:
691
+ return len(self.texts)
692
+
693
+
694
+ # ============================================
695
+ # INVERTED INDEX
696
+ # ============================================
697
+
698
+ class InvertedIndex:
699
+ """
700
+ Simple inverted index for fast word-to-document lookup.
701
+
702
+ Time complexity:
703
+ - Insert: O(w) where w is word count
704
+ - Search: O(1) for single word
705
+ - AND/OR queries: O(min(n1, n2)) for set operations
706
+ """
707
+
708
+ def __init__(self):
709
+ self.index: dict[str, set[int]] = defaultdict(set)
710
+ self.doc_count = 0
711
+
712
+ def add(self, doc_id: int, text: str) -> None:
713
+ """Add document to index."""
714
+ words = self._tokenize(text)
715
+ for word in words:
716
+ self.index[word].add(doc_id)
717
+ self.doc_count += 1
718
+
719
+ def _tokenize(self, text: str) -> list[str]:
720
+ """Simple tokenization."""
721
+ import re
722
+ return re.findall(r'[\u0590-\u05FFa-zA-Z]+', text.lower())
723
+
724
+ def search(self, word: str) -> set[int]:
725
+ """Find all documents containing word."""
726
+ return self.index.get(word.lower(), set())
727
+
728
+ def search_and(self, words: list[str]) -> set[int]:
729
+ """Find documents containing ALL words."""
730
+ if not words:
731
+ return set()
732
+ result = self.search(words[0])
733
+ for word in words[1:]:
734
+ result &= self.search(word)
735
+ return result
736
+
737
+ def search_or(self, words: list[str]) -> set[int]:
738
+ """Find documents containing ANY word."""
739
+ result = set()
740
+ for word in words:
741
+ result |= self.search(word)
742
+ return result
743
+
744
+
745
+ if __name__ == '__main__':
746
+ # Demo
747
+ print("=== Bloom Filter Demo ===")
748
+ bf = BloomFilter(expected_items=1000, fp_rate=0.01)
749
+ bf.add("message_1")
750
+ bf.add("message_2")
751
+ print(f"message_1 in filter: {'message_1' in bf}")
752
+ print(f"message_999 in filter: {'message_999' in bf}")
753
+ print(f"Memory usage: {bf.memory_usage} bytes")
754
+
755
+ print("\n=== Trie Demo ===")
756
+ trie = Trie()
757
+ trie.insert("@username1", data=1)
758
+ trie.insert("@username2", data=2)
759
+ trie.insert("@user_test", data=3)
760
+ print(f"Autocomplete '@user': {trie.autocomplete('@user')}")
761
+
762
+ print("\n=== Reply Graph Demo ===")
763
+ graph = ReplyGraph()
764
+ graph.add_message(1)
765
+ graph.add_message(2, reply_to=1)
766
+ graph.add_message(3, reply_to=1)
767
+ graph.add_message(4, reply_to=2)
768
+ graph.add_message(5, reply_to=2)
769
+
770
+ print(f"DFS from 1: {graph.dfs_descendants(1)}")
771
+ print(f"BFS from 1: {graph.bfs_descendants(1)}")
772
+ print(f"Thread path for 4: {graph.get_thread_path(4)}")
773
+ print(f"Stats: {graph.stats}")
indexer.py ADDED
@@ -0,0 +1,817 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Telegram JSON Chat Indexer (Optimized)
4
+
5
+ Features:
6
+ - Batch processing for faster indexing
7
+ - Graph building for reply threads
8
+ - Trigram index for fuzzy search
9
+ - Progress tracking
10
+ - Memory-efficient streaming
11
+
12
+ Usage:
13
+ python indexer.py <json_file> [--db <database_file>]
14
+ python indexer.py result.json --db telegram.db
15
+ python indexer.py result.json --batch-size 5000 --build-trigrams
16
+ """
17
+
18
+ import json
19
+ import sqlite3
20
+ import argparse
21
+
22
+ try:
23
+ import ijson
24
+ HAS_IJSON = True
25
+ except ImportError:
26
+ HAS_IJSON = False
27
+ import os
28
+ import time
29
+ from pathlib import Path
30
+ from typing import Any, Generator
31
+ from collections import defaultdict
32
+
33
+ from data_structures import BloomFilter, ReplyGraph, generate_trigrams
34
+
35
+
36
+ def flatten_text(text_field: Any) -> str:
37
+ """
38
+ Flatten the text field which can be either a string or array of mixed content.
39
+ """
40
+ if isinstance(text_field, str):
41
+ return text_field
42
+
43
+ if isinstance(text_field, list):
44
+ parts = []
45
+ for item in text_field:
46
+ if isinstance(item, str):
47
+ parts.append(item)
48
+ elif isinstance(item, dict) and 'text' in item:
49
+ parts.append(item['text'])
50
+ return ''.join(parts)
51
+
52
+ return ''
53
+
54
+
55
+ def extract_entities(text_entities: list) -> list[dict]:
56
+ """Extract typed entities (links, mentions, etc.) from text_entities array."""
57
+ entities = []
58
+ for entity in text_entities or []:
59
+ if isinstance(entity, dict):
60
+ entity_type = entity.get('type', 'plain')
61
+ if entity_type != 'plain':
62
+ entities.append({
63
+ 'type': entity_type,
64
+ 'value': entity.get('text', '')
65
+ })
66
+ return entities
67
+
68
+
69
+ def parse_message(msg: dict) -> dict | None:
70
+ """Parse a single message from Telegram JSON format."""
71
+ if msg.get('type') != 'message':
72
+ return None
73
+
74
+ text_plain = flatten_text(msg.get('text', ''))
75
+ entities = extract_entities(msg.get('text_entities', []))
76
+
77
+ has_links = any(e['type'] == 'link' for e in entities)
78
+ has_mentions = any(e['type'] == 'mention' for e in entities)
79
+
80
+ return {
81
+ 'id': msg.get('id'),
82
+ 'type': msg.get('type', 'message'),
83
+ 'date': msg.get('date'),
84
+ 'date_unixtime': int(msg.get('date_unixtime', 0)) if msg.get('date_unixtime') else 0,
85
+ 'from_name': msg.get('from', ''),
86
+ 'from_id': msg.get('from_id', ''),
87
+ 'reply_to_message_id': msg.get('reply_to_message_id'),
88
+ 'forwarded_from': msg.get('forwarded_from'),
89
+ 'forwarded_from_id': msg.get('forwarded_from_id'),
90
+ 'text_plain': text_plain,
91
+ 'text_length': len(text_plain),
92
+ 'has_media': 1 if msg.get('photo') or msg.get('file') or msg.get('media_type') else 0,
93
+ 'has_photo': 1 if msg.get('photo') else 0,
94
+ 'has_links': 1 if has_links else 0,
95
+ 'has_mentions': 1 if has_mentions else 0,
96
+ 'is_edited': 1 if msg.get('edited') else 0,
97
+ 'edited_unixtime': int(msg.get('edited_unixtime', 0)) if msg.get('edited_unixtime') else None,
98
+ 'photo_file_size': msg.get('photo_file_size'),
99
+ 'photo_width': msg.get('width'),
100
+ 'photo_height': msg.get('height'),
101
+ 'raw_json': json.dumps(msg, ensure_ascii=False),
102
+ 'entities': entities
103
+ }
104
+
105
+
106
+ def _detect_json_structure(json_path: str) -> str:
107
+ """Peek at JSON to determine if root is a list or object with 'messages' key."""
108
+ with open(json_path, 'r', encoding='utf-8') as f:
109
+ for char in iter(lambda: f.read(1), ''):
110
+ if char in ' \t\n\r':
111
+ continue
112
+ if char == '[':
113
+ return 'list'
114
+ return 'object'
115
+ return 'object'
116
+
117
+
118
+ def load_json_messages(json_path: str) -> Generator[dict, None, None]:
119
+ """
120
+ Load messages from Telegram export JSON file.
121
+
122
+ Uses ijson for streaming (constant memory) if available,
123
+ otherwise falls back to full json.load().
124
+ """
125
+ if HAS_IJSON:
126
+ structure = _detect_json_structure(json_path)
127
+ prefix = 'item' if structure == 'list' else 'messages.item'
128
+ with open(json_path, 'rb') as f:
129
+ for msg in ijson.items(f, prefix):
130
+ parsed = parse_message(msg)
131
+ if parsed:
132
+ yield parsed
133
+ else:
134
+ with open(json_path, 'r', encoding='utf-8') as f:
135
+ data = json.load(f)
136
+ messages = data if isinstance(data, list) else data.get('messages', [])
137
+ for msg in messages:
138
+ parsed = parse_message(msg)
139
+ if parsed:
140
+ yield parsed
141
+
142
+
143
+ def count_messages(json_path: str) -> int:
144
+ """Count messages in JSON file. Uses streaming if ijson available."""
145
+ if HAS_IJSON:
146
+ structure = _detect_json_structure(json_path)
147
+ prefix = 'item' if structure == 'list' else 'messages.item'
148
+ count = 0
149
+ with open(json_path, 'rb') as f:
150
+ for msg in ijson.items(f, prefix):
151
+ if msg.get('type') == 'message':
152
+ count += 1
153
+ return count
154
+ else:
155
+ with open(json_path, 'r', encoding='utf-8') as f:
156
+ data = json.load(f)
157
+ messages = data if isinstance(data, list) else data.get('messages', [])
158
+ return sum(1 for msg in messages if msg.get('type') == 'message')
159
+
160
+
161
+ def init_database(db_path: str) -> sqlite3.Connection:
162
+ """Initialize SQLite database with optimized schema."""
163
+ conn = sqlite3.connect(db_path)
164
+ conn.row_factory = sqlite3.Row
165
+
166
+ # Read and execute schema
167
+ schema_path = Path(__file__).parent / 'schema.sql'
168
+ if schema_path.exists():
169
+ with open(schema_path, 'r') as f:
170
+ conn.executescript(f.read())
171
+ else:
172
+ raise FileNotFoundError(f"Schema file not found: {schema_path}")
173
+
174
+ return conn
175
+
176
+
177
+ class OptimizedIndexer:
178
+ """
179
+ High-performance indexer with batch processing and graph building.
180
+
181
+ Features:
182
+ - Batch inserts (100x faster than individual inserts)
183
+ - Bloom filter for duplicate detection
184
+ - Reply graph construction
185
+ - Trigram index building
186
+ - Progress tracking
187
+ """
188
+
189
+ def __init__(
190
+ self,
191
+ db_path: str,
192
+ batch_size: int = 1000,
193
+ build_trigrams: bool = False,
194
+ build_graph: bool = True
195
+ ):
196
+ self.db_path = db_path
197
+ self.batch_size = batch_size
198
+ self.build_trigrams = build_trigrams
199
+ self.build_graph = build_graph
200
+
201
+ self.conn = init_database(db_path)
202
+ self.bloom = BloomFilter(expected_items=1000000, fp_rate=0.01)
203
+ self.graph = ReplyGraph() if build_graph else None
204
+
205
+ # Batch buffers
206
+ self.message_batch: list[tuple] = []
207
+ self.entity_batch: list[tuple] = []
208
+ self.trigram_batch: list[tuple] = []
209
+
210
+ # Stats
211
+ self.stats = {
212
+ 'messages': 0,
213
+ 'entities': 0,
214
+ 'trigrams': 0,
215
+ 'users': {},
216
+ 'skipped': 0,
217
+ 'duplicates': 0
218
+ }
219
+
220
+ def index_file(self, json_path: str, show_progress: bool = True) -> dict:
221
+ """
222
+ Index a JSON file into the database.
223
+
224
+ Returns statistics dict.
225
+ """
226
+ start_time = time.time()
227
+
228
+ # Count total for progress
229
+ if show_progress:
230
+ print(f"Counting messages in {json_path}...")
231
+ total = count_messages(json_path)
232
+ print(f"Found {total:,} messages to index")
233
+ else:
234
+ total = 0
235
+
236
+ # Disable auto-commit for batch processing
237
+ self.conn.execute('BEGIN TRANSACTION')
238
+
239
+ try:
240
+ for i, msg in enumerate(load_json_messages(json_path)):
241
+ self._index_message(msg)
242
+
243
+ # Progress update
244
+ if show_progress and (i + 1) % 10000 == 0:
245
+ elapsed = time.time() - start_time
246
+ rate = (i + 1) / elapsed
247
+ eta = (total - i - 1) / rate if rate > 0 else 0
248
+ print(f" Indexed {i+1:,}/{total:,} ({100*(i+1)/total:.1f}%) "
249
+ f"- {rate:.0f} msg/s - ETA: {eta:.0f}s")
250
+
251
+ # Flush remaining batches
252
+ self._flush_batches()
253
+
254
+ # Build reply graph in database
255
+ if self.build_graph:
256
+ self._build_graph_tables()
257
+
258
+ # Update users table
259
+ self._update_users()
260
+
261
+ # Commit transaction
262
+ self.conn.commit()
263
+
264
+ # Optimize FTS index
265
+ print("Optimizing FTS index...")
266
+ self.conn.execute("INSERT INTO messages_fts(messages_fts) VALUES('optimize')")
267
+ self.conn.commit()
268
+
269
+ except Exception as e:
270
+ self.conn.rollback()
271
+ raise e
272
+
273
+ elapsed = time.time() - start_time
274
+ self.stats['elapsed_seconds'] = elapsed
275
+ self.stats['messages_per_second'] = self.stats['messages'] / elapsed if elapsed > 0 else 0
276
+
277
+ return self.stats
278
+
279
+ def _index_message(self, msg: dict) -> None:
280
+ """Index a single message into batch buffers."""
281
+ msg_id = msg['id']
282
+
283
+ # Duplicate check with Bloom filter
284
+ msg_key = f"msg_{msg_id}"
285
+ if msg_key in self.bloom:
286
+ self.stats['duplicates'] += 1
287
+ return
288
+ self.bloom.add(msg_key)
289
+
290
+ # Add to message batch
291
+ self.message_batch.append((
292
+ msg['id'], msg['type'], msg['date'], msg['date_unixtime'],
293
+ msg['from_name'], msg['from_id'], msg['reply_to_message_id'],
294
+ msg['forwarded_from'], msg['forwarded_from_id'], msg['text_plain'],
295
+ msg['text_length'], msg['has_media'], msg['has_photo'],
296
+ msg['has_links'], msg['has_mentions'], msg['is_edited'],
297
+ msg['edited_unixtime'], msg['photo_file_size'],
298
+ msg['photo_width'], msg['photo_height'], msg['raw_json']
299
+ ))
300
+
301
+ # Add entities to batch
302
+ for entity in msg['entities']:
303
+ self.entity_batch.append((msg_id, entity['type'], entity['value']))
304
+
305
+ # Add trigrams if enabled
306
+ if self.build_trigrams and msg['text_plain']:
307
+ for i, trigram in enumerate(generate_trigrams(msg['text_plain'])):
308
+ self.trigram_batch.append((trigram, msg_id, i))
309
+
310
+ # Build graph
311
+ if self.graph:
312
+ self.graph.add_message(msg_id, msg['reply_to_message_id'])
313
+
314
+ # Track users
315
+ user_id = msg['from_id']
316
+ if user_id:
317
+ if user_id not in self.stats['users']:
318
+ self.stats['users'][user_id] = {
319
+ 'display_name': msg['from_name'],
320
+ 'first_seen': msg['date_unixtime'],
321
+ 'last_seen': msg['date_unixtime'],
322
+ 'count': 0
323
+ }
324
+ self.stats['users'][user_id]['count'] += 1
325
+ ts = msg['date_unixtime']
326
+ if ts and ts < self.stats['users'][user_id]['first_seen']:
327
+ self.stats['users'][user_id]['first_seen'] = ts
328
+ if ts and ts > self.stats['users'][user_id]['last_seen']:
329
+ self.stats['users'][user_id]['last_seen'] = ts
330
+
331
+ self.stats['messages'] += 1
332
+
333
+ # Flush if batch is full
334
+ if len(self.message_batch) >= self.batch_size:
335
+ self._flush_batches()
336
+
337
+ def _flush_batches(self) -> None:
338
+ """Flush all batch buffers to database."""
339
+ cursor = self.conn.cursor()
340
+
341
+ # Insert messages
342
+ if self.message_batch:
343
+ cursor.executemany('''
344
+ INSERT OR REPLACE INTO messages (
345
+ id, type, date, date_unixtime, from_name, from_id,
346
+ reply_to_message_id, forwarded_from, forwarded_from_id,
347
+ text_plain, text_length, has_media, has_photo, has_links,
348
+ has_mentions, is_edited, edited_unixtime, photo_file_size,
349
+ photo_width, photo_height, raw_json
350
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
351
+ ''', self.message_batch)
352
+ self.message_batch = []
353
+
354
+ # Insert entities
355
+ if self.entity_batch:
356
+ cursor.executemany('''
357
+ INSERT INTO entities (message_id, type, value)
358
+ VALUES (?, ?, ?)
359
+ ''', self.entity_batch)
360
+ self.stats['entities'] += len(self.entity_batch)
361
+ self.entity_batch = []
362
+
363
+ # Insert trigrams
364
+ if self.trigram_batch:
365
+ cursor.executemany('''
366
+ INSERT OR IGNORE INTO trigrams (trigram, message_id, position)
367
+ VALUES (?, ?, ?)
368
+ ''', self.trigram_batch)
369
+ self.stats['trigrams'] += len(self.trigram_batch)
370
+ self.trigram_batch = []
371
+
372
+ def _build_graph_tables(self) -> None:
373
+ """Build reply graph tables from in-memory graph."""
374
+ if not self.graph:
375
+ return
376
+
377
+ print("Building reply graph tables...")
378
+ cursor = self.conn.cursor()
379
+
380
+ # Insert edges into reply_graph
381
+ edges = []
382
+ for parent_id, children in self.graph.children.items():
383
+ for child_id in children:
384
+ edges.append((parent_id, child_id, 1))
385
+
386
+ if edges:
387
+ cursor.executemany('''
388
+ INSERT OR IGNORE INTO reply_graph (parent_id, child_id, depth)
389
+ VALUES (?, ?, ?)
390
+ ''', edges)
391
+
392
+ # Find connected components (threads)
393
+ print("Finding conversation threads...")
394
+ components = self.graph.find_connected_components()
395
+
396
+ thread_data = []
397
+ message_thread_data = []
398
+
399
+ for thread_id, component in enumerate(components):
400
+ if not component:
401
+ continue
402
+
403
+ # Find root (message with no parent in this component)
404
+ root_id = None
405
+ for msg_id in component:
406
+ if msg_id not in self.graph.parents:
407
+ root_id = msg_id
408
+ break
409
+ if root_id is None:
410
+ root_id = min(component)
411
+
412
+ # Get thread stats
413
+ cursor.execute('''
414
+ SELECT MIN(date_unixtime), MAX(date_unixtime), COUNT(DISTINCT from_id)
415
+ FROM messages WHERE id IN ({})
416
+ '''.format(','.join('?' * len(component))), list(component))
417
+ row = cursor.fetchone()
418
+
419
+ thread_data.append((
420
+ root_id,
421
+ len(component),
422
+ row[0], # first_message_time
423
+ row[1], # last_message_time
424
+ row[2] # participant_count
425
+ ))
426
+
427
+ # Map messages to threads with depth
428
+ for msg_id in component:
429
+ depth = len(self.graph.get_ancestors(msg_id))
430
+ message_thread_data.append((msg_id, len(thread_data), depth))
431
+
432
+ # Insert thread data
433
+ cursor.executemany('''
434
+ INSERT INTO threads (root_message_id, message_count, first_message_time,
435
+ last_message_time, participant_count)
436
+ VALUES (?, ?, ?, ?, ?)
437
+ ''', thread_data)
438
+
439
+ cursor.executemany('''
440
+ INSERT OR REPLACE INTO message_threads (message_id, thread_id, depth)
441
+ VALUES (?, ?, ?)
442
+ ''', message_thread_data)
443
+
444
+ print(f" Created {len(thread_data)} conversation threads")
445
+
446
+ def _update_users(self) -> None:
447
+ """Update users table from tracked data."""
448
+ cursor = self.conn.cursor()
449
+ user_data = [
450
+ (user_id, data['display_name'], data['first_seen'],
451
+ data['last_seen'], data['count'])
452
+ for user_id, data in self.stats['users'].items()
453
+ ]
454
+
455
+ cursor.executemany('''
456
+ INSERT OR REPLACE INTO users (user_id, display_name, first_seen, last_seen, message_count)
457
+ VALUES (?, ?, ?, ?, ?)
458
+ ''', user_data)
459
+
460
+ def close(self) -> None:
461
+ """Close database connection."""
462
+ self.conn.close()
463
+
464
+
465
+ class IncrementalIndexer:
466
+ """
467
+ Incremental indexer for adding new JSON data to existing database.
468
+
469
+ Features:
470
+ - Loads existing message IDs into Bloom filter
471
+ - Only processes new messages
472
+ - Updates FTS index automatically
473
+ - Fast duplicate detection O(1)
474
+ """
475
+
476
+ def __init__(self, db_path: str, batch_size: int = 1000):
477
+ self.db_path = db_path
478
+ self.batch_size = batch_size
479
+
480
+ if not os.path.exists(db_path):
481
+ raise FileNotFoundError(f"Database not found: {db_path}. Use OptimizedIndexer for initial import.")
482
+
483
+ self.conn = sqlite3.connect(db_path)
484
+ self.conn.row_factory = sqlite3.Row
485
+
486
+ # Batch buffers
487
+ self.message_batch: list[tuple] = []
488
+ self.entity_batch: list[tuple] = []
489
+
490
+ # Stats (must be initialized before _load_existing_ids)
491
+ self.stats = {
492
+ 'total_in_file': 0,
493
+ 'new_messages': 0,
494
+ 'duplicates': 0,
495
+ 'entities': 0,
496
+ 'users_updated': 0
497
+ }
498
+
499
+ # Load existing message IDs into Bloom filter
500
+ self.bloom = BloomFilter(expected_items=2000000, fp_rate=0.001)
501
+ self._load_existing_ids()
502
+
503
+ def _load_existing_ids(self) -> None:
504
+ """Load existing message IDs into Bloom filter for O(1) duplicate detection."""
505
+ cursor = self.conn.cursor()
506
+ cursor.execute("SELECT id FROM messages")
507
+
508
+ count = 0
509
+ for row in cursor:
510
+ self.bloom.add(f"msg_{row[0]}")
511
+ count += 1
512
+
513
+ print(f"Loaded {count:,} existing message IDs into Bloom filter")
514
+ self.stats['existing_count'] = count
515
+
516
+ def update_from_json(self, json_path: str, show_progress: bool = True) -> dict:
517
+ """
518
+ Add new messages from JSON file to existing database.
519
+
520
+ Only messages that don't exist in the database will be added.
521
+ FTS5 index is updated automatically.
522
+ Uses streaming JSON parser (ijson) when available for constant memory usage.
523
+ """
524
+ start_time = time.time()
525
+
526
+ # Count total for progress (streaming-aware)
527
+ total_hint = 0
528
+ if show_progress:
529
+ total_hint = count_messages(json_path)
530
+ print(f"Processing ~{total_hint:,} messages from {json_path}")
531
+
532
+ self.stats['total_in_file'] = total_hint
533
+
534
+ # Start transaction
535
+ self.conn.execute('BEGIN TRANSACTION')
536
+
537
+ try:
538
+ if HAS_IJSON:
539
+ structure = _detect_json_structure(json_path)
540
+ prefix = 'item' if structure == 'list' else 'messages.item'
541
+ with open(json_path, 'rb') as f:
542
+ for i, msg in enumerate(ijson.items(f, prefix)):
543
+ if msg.get('type') != 'message':
544
+ continue
545
+ parsed = parse_message(msg)
546
+ if parsed:
547
+ self._process_message(parsed)
548
+ if show_progress and (i + 1) % 10000 == 0:
549
+ print(f" Processed {i+1:,} - "
550
+ f"New: {self.stats['new_messages']:,}, "
551
+ f"Duplicates: {self.stats['duplicates']:,}")
552
+ else:
553
+ with open(json_path, 'r', encoding='utf-8') as f:
554
+ data = json.load(f)
555
+ messages = data if isinstance(data, list) else data.get('messages', [])
556
+ self.stats['total_in_file'] = len(messages)
557
+ for i, msg in enumerate(messages):
558
+ if msg.get('type') != 'message':
559
+ continue
560
+ parsed = parse_message(msg)
561
+ if parsed:
562
+ self._process_message(parsed)
563
+ if show_progress and (i + 1) % 10000 == 0:
564
+ print(f" Processed {i+1:,}/{len(messages):,} - "
565
+ f"New: {self.stats['new_messages']:,}, "
566
+ f"Duplicates: {self.stats['duplicates']:,}")
567
+
568
+ # Flush remaining
569
+ self._flush_batches()
570
+
571
+ # Update user stats
572
+ self._update_user_stats()
573
+
574
+ # Commit
575
+ self.conn.commit()
576
+
577
+ # Optimize FTS if we added new data
578
+ if self.stats['new_messages'] > 0:
579
+ print("Optimizing FTS index...")
580
+ self.conn.execute("INSERT INTO messages_fts(messages_fts) VALUES('optimize')")
581
+ self.conn.commit()
582
+
583
+ except Exception as e:
584
+ self.conn.rollback()
585
+ raise e
586
+
587
+ elapsed = time.time() - start_time
588
+ self.stats['elapsed_seconds'] = elapsed
589
+
590
+ return self.stats
591
+
592
+ def update_from_json_data(self, json_data: dict | list, show_progress: bool = False) -> dict:
593
+ """
594
+ Add new messages from JSON data (already parsed, not from file).
595
+
596
+ Useful for API uploads.
597
+ """
598
+ start_time = time.time()
599
+
600
+ messages = json_data if isinstance(json_data, list) else json_data.get('messages', [])
601
+ self.stats['total_in_file'] = len(messages)
602
+
603
+ # Start transaction
604
+ self.conn.execute('BEGIN TRANSACTION')
605
+
606
+ try:
607
+ for msg in messages:
608
+ if msg.get('type') != 'message':
609
+ continue
610
+
611
+ parsed = parse_message(msg)
612
+ if parsed:
613
+ self._process_message(parsed)
614
+
615
+ # Flush remaining
616
+ self._flush_batches()
617
+
618
+ # Update user stats
619
+ self._update_user_stats()
620
+
621
+ # Commit
622
+ self.conn.commit()
623
+
624
+ # Optimize FTS if we added new data
625
+ if self.stats['new_messages'] > 0:
626
+ self.conn.execute("INSERT INTO messages_fts(messages_fts) VALUES('optimize')")
627
+ self.conn.commit()
628
+
629
+ except Exception as e:
630
+ self.conn.rollback()
631
+ raise e
632
+
633
+ elapsed = time.time() - start_time
634
+ self.stats['elapsed_seconds'] = elapsed
635
+
636
+ return self.stats
637
+
638
+ def _process_message(self, msg: dict) -> None:
639
+ """Process a single message, adding to batch if new."""
640
+ msg_id = msg['id']
641
+ msg_key = f"msg_{msg_id}"
642
+
643
+ # Check if already exists (Bloom filter first, then DB if needed)
644
+ if msg_key in self.bloom:
645
+ self.stats['duplicates'] += 1
646
+ return
647
+
648
+ # Add to Bloom filter
649
+ self.bloom.add(msg_key)
650
+
651
+ # Add to message batch
652
+ self.message_batch.append((
653
+ msg['id'], msg['type'], msg['date'], msg['date_unixtime'],
654
+ msg['from_name'], msg['from_id'], msg['reply_to_message_id'],
655
+ msg['forwarded_from'], msg['forwarded_from_id'], msg['text_plain'],
656
+ msg['text_length'], msg['has_media'], msg['has_photo'],
657
+ msg['has_links'], msg['has_mentions'], msg['is_edited'],
658
+ msg['edited_unixtime'], msg['photo_file_size'],
659
+ msg['photo_width'], msg['photo_height'], msg['raw_json']
660
+ ))
661
+
662
+ # Add entities to batch
663
+ for entity in msg['entities']:
664
+ self.entity_batch.append((msg_id, entity['type'], entity['value']))
665
+
666
+ self.stats['new_messages'] += 1
667
+
668
+ # Flush if batch is full
669
+ if len(self.message_batch) >= self.batch_size:
670
+ self._flush_batches()
671
+
672
+ def _flush_batches(self) -> None:
673
+ """Flush batch buffers to database."""
674
+ cursor = self.conn.cursor()
675
+
676
+ # Insert messages (FTS5 trigger will update automatically)
677
+ if self.message_batch:
678
+ cursor.executemany('''
679
+ INSERT OR IGNORE INTO messages (
680
+ id, type, date, date_unixtime, from_name, from_id,
681
+ reply_to_message_id, forwarded_from, forwarded_from_id,
682
+ text_plain, text_length, has_media, has_photo, has_links,
683
+ has_mentions, is_edited, edited_unixtime, photo_file_size,
684
+ photo_width, photo_height, raw_json
685
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
686
+ ''', self.message_batch)
687
+ self.message_batch = []
688
+
689
+ # Insert entities
690
+ if self.entity_batch:
691
+ cursor.executemany('''
692
+ INSERT OR IGNORE INTO entities (message_id, type, value)
693
+ VALUES (?, ?, ?)
694
+ ''', self.entity_batch)
695
+ self.stats['entities'] += len(self.entity_batch)
696
+ self.entity_batch = []
697
+
698
+ def _update_user_stats(self) -> None:
699
+ """Update users table with aggregated stats."""
700
+ cursor = self.conn.cursor()
701
+
702
+ # Upsert users from messages
703
+ cursor.execute('''
704
+ INSERT OR REPLACE INTO users (user_id, display_name, first_seen, last_seen, message_count)
705
+ SELECT
706
+ from_id,
707
+ from_name,
708
+ MIN(date_unixtime),
709
+ MAX(date_unixtime),
710
+ COUNT(*)
711
+ FROM messages
712
+ WHERE from_id IS NOT NULL AND from_id != ''
713
+ GROUP BY from_id
714
+ ''')
715
+ self.stats['users_updated'] = cursor.rowcount
716
+
717
+ def close(self) -> None:
718
+ """Close database connection."""
719
+ self.conn.close()
720
+
721
+
722
+ def update_database(db_path: str, json_path: str) -> dict:
723
+ """
724
+ Convenience function to update database with new JSON file.
725
+
726
+ Args:
727
+ db_path: Path to existing SQLite database
728
+ json_path: Path to new JSON file
729
+
730
+ Returns:
731
+ Statistics dict
732
+ """
733
+ indexer = IncrementalIndexer(db_path)
734
+ try:
735
+ stats = indexer.update_from_json(json_path)
736
+ return stats
737
+ finally:
738
+ indexer.close()
739
+
740
+
741
+ def main():
742
+ parser = argparse.ArgumentParser(description='Index Telegram JSON export to SQLite (Optimized)')
743
+ parser.add_argument('json_file', help='Path to Telegram export JSON file')
744
+ parser.add_argument('--db', default='telegram.db', help='SQLite database path')
745
+ parser.add_argument('--batch-size', type=int, default=1000, help='Batch size for inserts')
746
+ parser.add_argument('--build-trigrams', action='store_true', help='Build trigram index for fuzzy search')
747
+ parser.add_argument('--no-graph', action='store_true', help='Skip building reply graph')
748
+ parser.add_argument('--quiet', action='store_true', help='Suppress progress output')
749
+ parser.add_argument('--update', action='store_true',
750
+ help='Update existing database (add only new messages)')
751
+
752
+ args = parser.parse_args()
753
+
754
+ if not os.path.exists(args.json_file):
755
+ print(f"Error: JSON file not found: {args.json_file}")
756
+ return 1
757
+
758
+ # Update mode: add new messages to existing database
759
+ if args.update:
760
+ if not os.path.exists(args.db):
761
+ print(f"Error: Database not found: {args.db}")
762
+ print("Use without --update flag for initial import")
763
+ return 1
764
+
765
+ print(f"{'='*50}")
766
+ print(f"INCREMENTAL UPDATE MODE")
767
+ print(f"{'='*50}")
768
+ print(f"Database: {args.db}")
769
+ print(f"New JSON: {args.json_file}")
770
+ print()
771
+
772
+ indexer = IncrementalIndexer(args.db, args.batch_size)
773
+ stats = indexer.update_from_json(args.json_file, show_progress=not args.quiet)
774
+
775
+ print(f"\n{'='*50}")
776
+ print(f"Update complete!")
777
+ print(f"{'='*50}")
778
+ print(f" Messages in file: {stats['total_in_file']:,}")
779
+ print(f" Already existed: {stats['duplicates']:,}")
780
+ print(f" New messages added: {stats['new_messages']:,}")
781
+ print(f" New entities: {stats['entities']:,}")
782
+ print(f" Time elapsed: {stats['elapsed_seconds']:.1f}s")
783
+
784
+ indexer.close()
785
+ return 0
786
+
787
+ # Initial import mode
788
+ print(f"Initializing database: {args.db}")
789
+ indexer = OptimizedIndexer(
790
+ db_path=args.db,
791
+ batch_size=args.batch_size,
792
+ build_trigrams=args.build_trigrams,
793
+ build_graph=not args.no_graph
794
+ )
795
+
796
+ print(f"Indexing: {args.json_file}")
797
+ stats = indexer.index_file(args.json_file, show_progress=not args.quiet)
798
+
799
+ print(f"\n{'='*50}")
800
+ print(f"Indexing complete!")
801
+ print(f"{'='*50}")
802
+ print(f" Messages indexed: {stats['messages']:,}")
803
+ print(f" Entities extracted: {stats['entities']:,}")
804
+ print(f" Unique users: {len(stats['users']):,}")
805
+ print(f" Duplicates skipped: {stats['duplicates']:,}")
806
+ if stats.get('trigrams'):
807
+ print(f" Trigrams indexed: {stats['trigrams']:,}")
808
+ print(f" Time elapsed: {stats['elapsed_seconds']:.1f}s")
809
+ print(f" Speed: {stats['messages_per_second']:.0f} msg/s")
810
+ print(f"\nDatabase saved to: {args.db}")
811
+
812
+ indexer.close()
813
+ return 0
814
+
815
+
816
+ if __name__ == '__main__':
817
+ exit(main())
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ flask>=3.0
2
+ gunicorn>=21.2
3
+ requests>=2.31
4
+ ijson>=3.2
schema.sql ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -- Telegram Chat Indexing Schema (Optimized)
2
+ -- SQLite with FTS5 for full-text search + performance optimizations
3
+
4
+ -- ============================================
5
+ -- PRAGMA OPTIMIZATIONS
6
+ -- ============================================
7
+ PRAGMA journal_mode = WAL; -- Write-Ahead Logging for better concurrency
8
+ PRAGMA synchronous = NORMAL; -- Balance between safety and speed
9
+ PRAGMA cache_size = -64000; -- 64MB cache
10
+ PRAGMA temp_store = MEMORY; -- Store temp tables in memory
11
+ PRAGMA mmap_size = 268435456; -- 256MB memory-mapped I/O
12
+
13
+ -- ============================================
14
+ -- MAIN TABLES
15
+ -- ============================================
16
+
17
+ -- Main messages table
18
+ CREATE TABLE IF NOT EXISTS messages (
19
+ id INTEGER PRIMARY KEY,
20
+ type TEXT DEFAULT 'message',
21
+ date TEXT,
22
+ date_unixtime INTEGER NOT NULL,
23
+ from_name TEXT,
24
+ from_id TEXT NOT NULL,
25
+ reply_to_message_id INTEGER,
26
+ forwarded_from TEXT,
27
+ forwarded_from_id TEXT,
28
+ text_plain TEXT,
29
+ text_length INTEGER DEFAULT 0,
30
+ has_media INTEGER DEFAULT 0,
31
+ has_photo INTEGER DEFAULT 0,
32
+ has_links INTEGER DEFAULT 0,
33
+ has_mentions INTEGER DEFAULT 0,
34
+ is_edited INTEGER DEFAULT 0,
35
+ edited_unixtime INTEGER,
36
+ photo_file_size INTEGER,
37
+ photo_width INTEGER,
38
+ photo_height INTEGER,
39
+ raw_json TEXT
40
+ );
41
+
42
+ -- Users table (extracted from messages)
43
+ CREATE TABLE IF NOT EXISTS users (
44
+ user_id TEXT PRIMARY KEY,
45
+ display_name TEXT,
46
+ first_seen INTEGER,
47
+ last_seen INTEGER,
48
+ message_count INTEGER DEFAULT 0
49
+ );
50
+
51
+ -- Entities table (links, mentions, etc.)
52
+ CREATE TABLE IF NOT EXISTS entities (
53
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
54
+ message_id INTEGER NOT NULL,
55
+ type TEXT NOT NULL,
56
+ value TEXT NOT NULL,
57
+ FOREIGN KEY (message_id) REFERENCES messages(id) ON DELETE CASCADE
58
+ );
59
+
60
+ -- ============================================
61
+ -- GRAPH STRUCTURE FOR REPLY THREADS
62
+ -- ============================================
63
+
64
+ -- Pre-computed reply graph edges for fast traversal
65
+ CREATE TABLE IF NOT EXISTS reply_graph (
66
+ parent_id INTEGER NOT NULL,
67
+ child_id INTEGER NOT NULL,
68
+ depth INTEGER DEFAULT 1,
69
+ PRIMARY KEY (parent_id, child_id)
70
+ );
71
+
72
+ -- Conversation threads (connected components)
73
+ CREATE TABLE IF NOT EXISTS threads (
74
+ thread_id INTEGER PRIMARY KEY AUTOINCREMENT,
75
+ root_message_id INTEGER UNIQUE,
76
+ message_count INTEGER DEFAULT 0,
77
+ first_message_time INTEGER,
78
+ last_message_time INTEGER,
79
+ participant_count INTEGER DEFAULT 0
80
+ );
81
+
82
+ -- Message to thread mapping
83
+ CREATE TABLE IF NOT EXISTS message_threads (
84
+ message_id INTEGER PRIMARY KEY,
85
+ thread_id INTEGER NOT NULL,
86
+ depth INTEGER DEFAULT 0,
87
+ FOREIGN KEY (thread_id) REFERENCES threads(thread_id)
88
+ );
89
+
90
+ -- ============================================
91
+ -- TRIGRAM INDEX FOR FUZZY SEARCH
92
+ -- ============================================
93
+
94
+ -- Trigrams for fuzzy/approximate string matching
95
+ CREATE TABLE IF NOT EXISTS trigrams (
96
+ trigram TEXT NOT NULL,
97
+ message_id INTEGER NOT NULL,
98
+ position INTEGER NOT NULL,
99
+ PRIMARY KEY (trigram, message_id, position)
100
+ );
101
+
102
+ -- ============================================
103
+ -- FTS5 FULL-TEXT SEARCH (OPTIMIZED)
104
+ -- ============================================
105
+
106
+ -- Full-text search with prefix index for autocomplete
107
+ CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts USING fts5(
108
+ text_plain,
109
+ from_name,
110
+ content='messages',
111
+ content_rowid='id',
112
+ tokenize='unicode61 remove_diacritics 2',
113
+ prefix='2 3 4' -- Enable prefix queries for autocomplete
114
+ );
115
+
116
+ -- Triggers to keep FTS in sync
117
+ CREATE TRIGGER IF NOT EXISTS messages_ai AFTER INSERT ON messages BEGIN
118
+ INSERT INTO messages_fts(rowid, text_plain, from_name)
119
+ VALUES (new.id, new.text_plain, new.from_name);
120
+ END;
121
+
122
+ CREATE TRIGGER IF NOT EXISTS messages_ad AFTER DELETE ON messages BEGIN
123
+ INSERT INTO messages_fts(messages_fts, rowid, text_plain, from_name)
124
+ VALUES ('delete', old.id, old.text_plain, old.from_name);
125
+ END;
126
+
127
+ CREATE TRIGGER IF NOT EXISTS messages_au AFTER UPDATE ON messages BEGIN
128
+ INSERT INTO messages_fts(messages_fts, rowid, text_plain, from_name)
129
+ VALUES ('delete', old.id, old.text_plain, old.from_name);
130
+ INSERT INTO messages_fts(rowid, text_plain, from_name)
131
+ VALUES (new.id, new.text_plain, new.from_name);
132
+ END;
133
+
134
+ -- ============================================
135
+ -- OPTIMIZED INDEXES
136
+ -- ============================================
137
+
138
+ -- Composite indexes for common query patterns
139
+ CREATE INDEX IF NOT EXISTS idx_messages_date ON messages(date_unixtime);
140
+ CREATE INDEX IF NOT EXISTS idx_messages_from ON messages(from_id);
141
+ CREATE INDEX IF NOT EXISTS idx_messages_from_date ON messages(from_id, date_unixtime);
142
+ CREATE INDEX IF NOT EXISTS idx_messages_reply ON messages(reply_to_message_id) WHERE reply_to_message_id IS NOT NULL;
143
+ CREATE INDEX IF NOT EXISTS idx_messages_forwarded ON messages(forwarded_from_id) WHERE forwarded_from_id IS NOT NULL;
144
+ CREATE INDEX IF NOT EXISTS idx_messages_has_links ON messages(has_links) WHERE has_links = 1;
145
+ CREATE INDEX IF NOT EXISTS idx_messages_has_media ON messages(has_media) WHERE has_media = 1;
146
+
147
+ -- Entity indexes
148
+ CREATE INDEX IF NOT EXISTS idx_entities_message ON entities(message_id);
149
+ CREATE INDEX IF NOT EXISTS idx_entities_type_value ON entities(type, value);
150
+ CREATE INDEX IF NOT EXISTS idx_entities_value ON entities(value);
151
+
152
+ -- Graph indexes
153
+ CREATE INDEX IF NOT EXISTS idx_reply_graph_child ON reply_graph(child_id);
154
+ CREATE INDEX IF NOT EXISTS idx_message_threads_thread ON message_threads(thread_id);
155
+
156
+ -- Trigram index
157
+ CREATE INDEX IF NOT EXISTS idx_trigrams_trigram ON trigrams(trigram);
158
+
159
+ -- ============================================
160
+ -- PARTICIPANTS TABLE (from Telethon API)
161
+ -- ============================================
162
+
163
+ CREATE TABLE IF NOT EXISTS participants (
164
+ user_id TEXT PRIMARY KEY,
165
+ first_name TEXT,
166
+ last_name TEXT,
167
+ username TEXT,
168
+ phone TEXT,
169
+ is_bot INTEGER DEFAULT 0,
170
+ is_admin INTEGER DEFAULT 0,
171
+ is_creator INTEGER DEFAULT 0,
172
+ is_premium INTEGER DEFAULT 0,
173
+ join_date INTEGER,
174
+ last_status TEXT DEFAULT 'unknown',
175
+ last_online INTEGER,
176
+ about TEXT,
177
+ updated_at INTEGER
178
+ );
179
+
180
+ -- ============================================
181
+ -- STATISTICS TABLE FOR FAST AGGREGATIONS
182
+ -- ============================================
183
+
184
+ CREATE TABLE IF NOT EXISTS stats_cache (
185
+ key TEXT PRIMARY KEY,
186
+ value TEXT,
187
+ updated_at INTEGER
188
+ );
189
+
190
+ -- ============================================
191
+ -- VECTOR EMBEDDINGS TABLE (OPTIONAL)
192
+ -- ============================================
193
+
194
+ -- For semantic search with FAISS
195
+ CREATE TABLE IF NOT EXISTS embeddings (
196
+ message_id INTEGER PRIMARY KEY,
197
+ embedding BLOB, -- Serialized numpy array
198
+ model_name TEXT DEFAULT 'default',
199
+ FOREIGN KEY (message_id) REFERENCES messages(id) ON DELETE CASCADE
200
+ );
search.py ADDED
@@ -0,0 +1,564 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Telegram Chat Search Utilities (Optimized)
4
+
5
+ Features:
6
+ - Full-text search with BM25 ranking
7
+ - LRU caching for repeated queries
8
+ - Fuzzy search with trigram similarity
9
+ - Thread traversal with DFS/BFS
10
+ - Autocomplete suggestions
11
+
12
+ Usage:
13
+ python search.py <query> [options]
14
+ python search.py "שלום" --db telegram.db
15
+ python search.py "link" --user user123 --fuzzy
16
+ """
17
+
18
+ import sqlite3
19
+ import argparse
20
+ from datetime import datetime
21
+ from typing import Optional
22
+ from functools import lru_cache
23
+
24
+ from data_structures import LRUCache, Trie, TrigramIndex, ReplyGraph, lru_cached
25
+
26
+
27
+ class TelegramSearch:
28
+ """
29
+ High-performance search interface for indexed Telegram messages.
30
+
31
+ Features:
32
+ - Full-text search with FTS5 and BM25 ranking
33
+ - Query result caching (LRU)
34
+ - Fuzzy/approximate search with trigrams
35
+ - Thread reconstruction with graph traversal
36
+ - Autocomplete for usernames and common terms
37
+ """
38
+
39
+ def __init__(self, db_path: str = 'telegram.db', cache_size: int = 1000):
40
+ self.db_path = db_path
41
+ self.conn = sqlite3.connect(db_path)
42
+ self.conn.row_factory = sqlite3.Row
43
+
44
+ # Initialize caches
45
+ self.query_cache = LRUCache(maxsize=cache_size)
46
+ self.user_trie: Optional[Trie] = None
47
+ self.trigram_index: Optional[TrigramIndex] = None
48
+ self.reply_graph: Optional[ReplyGraph] = None
49
+
50
+ def close(self):
51
+ self.conn.close()
52
+
53
+ def __enter__(self):
54
+ return self
55
+
56
+ def __exit__(self, *args):
57
+ self.close()
58
+
59
+ # ==========================================
60
+ # FULL-TEXT SEARCH
61
+ # ==========================================
62
+
63
+ def search(
64
+ self,
65
+ query: str,
66
+ user_id: Optional[str] = None,
67
+ from_date: Optional[int] = None,
68
+ to_date: Optional[int] = None,
69
+ has_links: Optional[bool] = None,
70
+ has_mentions: Optional[bool] = None,
71
+ has_media: Optional[bool] = None,
72
+ limit: int = 100,
73
+ offset: int = 0,
74
+ use_cache: bool = True
75
+ ) -> list[dict]:
76
+ """
77
+ Full-text search with BM25 ranking and optional filters.
78
+
79
+ Args:
80
+ query: FTS5 query (supports AND, OR, NOT, "phrase", prefix*)
81
+ user_id: Filter by user ID
82
+ from_date: Unix timestamp lower bound
83
+ to_date: Unix timestamp upper bound
84
+ has_links/has_mentions/has_media: Boolean filters
85
+ limit: Max results
86
+ offset: Pagination offset
87
+ use_cache: Whether to use LRU cache
88
+
89
+ Returns:
90
+ List of message dicts with relevance scores
91
+ """
92
+ # Build cache key
93
+ cache_key = f"search:{query}:{user_id}:{from_date}:{to_date}:{has_links}:{has_mentions}:{has_media}:{limit}:{offset}"
94
+
95
+ if use_cache:
96
+ cached = self.query_cache.get(cache_key)
97
+ if cached is not None:
98
+ return cached
99
+
100
+ # Build query conditions
101
+ conditions = []
102
+ params = []
103
+
104
+ if user_id:
105
+ conditions.append("m.from_id = ?")
106
+ params.append(user_id)
107
+
108
+ if from_date:
109
+ conditions.append("m.date_unixtime >= ?")
110
+ params.append(from_date)
111
+
112
+ if to_date:
113
+ conditions.append("m.date_unixtime <= ?")
114
+ params.append(to_date)
115
+
116
+ if has_links is not None:
117
+ conditions.append("m.has_links = ?")
118
+ params.append(1 if has_links else 0)
119
+
120
+ if has_mentions is not None:
121
+ conditions.append("m.has_mentions = ?")
122
+ params.append(1 if has_mentions else 0)
123
+
124
+ if has_media is not None:
125
+ conditions.append("m.has_media = ?")
126
+ params.append(1 if has_media else 0)
127
+
128
+ where_clause = " AND ".join(conditions) if conditions else "1=1"
129
+
130
+ sql = f'''
131
+ SELECT
132
+ m.id,
133
+ m.date,
134
+ m.date_unixtime,
135
+ m.from_name,
136
+ m.from_id,
137
+ m.text_plain,
138
+ m.reply_to_message_id,
139
+ m.forwarded_from,
140
+ m.has_links,
141
+ m.has_mentions,
142
+ m.has_media,
143
+ bm25(messages_fts, 1.0, 0.5) as relevance
144
+ FROM messages_fts
145
+ JOIN messages m ON messages_fts.rowid = m.id
146
+ WHERE messages_fts MATCH ?
147
+ AND {where_clause}
148
+ ORDER BY relevance
149
+ LIMIT ? OFFSET ?
150
+ '''
151
+
152
+ params = [query] + params + [limit, offset]
153
+
154
+ cursor = self.conn.execute(sql, params)
155
+ results = [dict(row) for row in cursor.fetchall()]
156
+
157
+ if use_cache:
158
+ self.query_cache.put(cache_key, results)
159
+
160
+ return results
161
+
162
+ def search_prefix(self, prefix: str, limit: int = 100) -> list[dict]:
163
+ """
164
+ Search using prefix matching (autocomplete-style).
165
+
166
+ Uses FTS5 prefix index for fast prefix queries.
167
+ """
168
+ # FTS5 prefix search syntax
169
+ query = f'{prefix}*'
170
+ return self.search(query, limit=limit, use_cache=True)
171
+
172
+ # ==========================================
173
+ # FUZZY SEARCH
174
+ # ==========================================
175
+
176
+ def fuzzy_search(
177
+ self,
178
+ query: str,
179
+ threshold: float = 0.3,
180
+ limit: int = 50
181
+ ) -> list[dict]:
182
+ """
183
+ Fuzzy search using trigram similarity.
184
+
185
+ Finds messages even with typos or slight variations.
186
+
187
+ Args:
188
+ query: Search query
189
+ threshold: Minimum similarity (0-1)
190
+ limit: Max results
191
+
192
+ Returns:
193
+ List of (message, similarity) tuples
194
+ """
195
+ # Build trigram index if not exists
196
+ if self.trigram_index is None:
197
+ self._build_trigram_index()
198
+
199
+ # Search trigram index
200
+ matches = self.trigram_index.search(query, threshold=threshold, limit=limit)
201
+
202
+ # Fetch full messages
203
+ results = []
204
+ for msg_id, similarity in matches:
205
+ cursor = self.conn.execute(
206
+ 'SELECT * FROM messages WHERE id = ?',
207
+ (msg_id,)
208
+ )
209
+ row = cursor.fetchone()
210
+ if row:
211
+ msg = dict(row)
212
+ msg['similarity'] = similarity
213
+ results.append(msg)
214
+
215
+ return results
216
+
217
+ def _build_trigram_index(self) -> None:
218
+ """Build in-memory trigram index from database."""
219
+ print("Building trigram index (first time only)...")
220
+ self.trigram_index = TrigramIndex()
221
+
222
+ cursor = self.conn.execute(
223
+ 'SELECT id, text_plain FROM messages WHERE text_plain IS NOT NULL'
224
+ )
225
+ for row in cursor.fetchall():
226
+ self.trigram_index.add(row[0], row[1])
227
+
228
+ print(f"Trigram index built: {len(self.trigram_index)} documents")
229
+
230
+ # ==========================================
231
+ # THREAD TRAVERSAL
232
+ # ==========================================
233
+
234
+ def get_thread_dfs(self, message_id: int) -> list[dict]:
235
+ """
236
+ Get full conversation thread using DFS traversal.
237
+
238
+ Returns messages in depth-first order (follows reply chains deep).
239
+ """
240
+ if self.reply_graph is None:
241
+ self._build_reply_graph()
242
+
243
+ # Find thread root
244
+ root_id = self.reply_graph.get_thread_root(message_id)
245
+
246
+ # DFS traversal
247
+ msg_ids = self.reply_graph.dfs_descendants(root_id)
248
+
249
+ # Fetch messages in order
250
+ return self._fetch_messages_ordered(msg_ids)
251
+
252
+ def get_thread_bfs(self, message_id: int) -> list[dict]:
253
+ """
254
+ Get conversation thread using BFS traversal.
255
+
256
+ Returns messages level by level.
257
+ """
258
+ if self.reply_graph is None:
259
+ self._build_reply_graph()
260
+
261
+ root_id = self.reply_graph.get_thread_root(message_id)
262
+ msg_ids = self.reply_graph.bfs_descendants(root_id)
263
+
264
+ return self._fetch_messages_ordered(msg_ids)
265
+
266
+ def get_thread_with_depth(self, message_id: int) -> list[tuple[dict, int]]:
267
+ """
268
+ Get thread with depth information for each message.
269
+
270
+ Returns list of (message, depth) tuples.
271
+ """
272
+ if self.reply_graph is None:
273
+ self._build_reply_graph()
274
+
275
+ root_id = self.reply_graph.get_thread_root(message_id)
276
+ items = self.reply_graph.bfs_with_depth(root_id)
277
+
278
+ results = []
279
+ for msg_id, depth in items:
280
+ cursor = self.conn.execute(
281
+ 'SELECT * FROM messages WHERE id = ?',
282
+ (msg_id,)
283
+ )
284
+ row = cursor.fetchone()
285
+ if row:
286
+ results.append((dict(row), depth))
287
+
288
+ return results
289
+
290
+ def get_replies(self, message_id: int) -> list[dict]:
291
+ """Get all direct replies to a message."""
292
+ if self.reply_graph is None:
293
+ self._build_reply_graph()
294
+
295
+ child_ids = self.reply_graph.get_children(message_id)
296
+ return self._fetch_messages_ordered(child_ids)
297
+
298
+ def get_conversation_path(self, message_id: int) -> list[dict]:
299
+ """Get the path from thread root to this message."""
300
+ if self.reply_graph is None:
301
+ self._build_reply_graph()
302
+
303
+ path_ids = self.reply_graph.get_thread_path(message_id)
304
+ return self._fetch_messages_ordered(path_ids)
305
+
306
+ def _build_reply_graph(self) -> None:
307
+ """Build in-memory reply graph from database."""
308
+ print("Building reply graph (first time only)...")
309
+ self.reply_graph = ReplyGraph()
310
+
311
+ cursor = self.conn.execute(
312
+ 'SELECT id, reply_to_message_id FROM messages'
313
+ )
314
+ for row in cursor.fetchall():
315
+ self.reply_graph.add_message(row[0], row[1])
316
+
317
+ print(f"Reply graph built: {self.reply_graph.stats}")
318
+
319
+ def _fetch_messages_ordered(self, msg_ids: list[int]) -> list[dict]:
320
+ """Fetch messages preserving the order of IDs."""
321
+ if not msg_ids:
322
+ return []
323
+
324
+ placeholders = ','.join('?' * len(msg_ids))
325
+ cursor = self.conn.execute(
326
+ f'SELECT * FROM messages WHERE id IN ({placeholders})',
327
+ msg_ids
328
+ )
329
+
330
+ # Create lookup dict
331
+ msg_map = {row['id']: dict(row) for row in cursor.fetchall()}
332
+
333
+ # Return in original order
334
+ return [msg_map[mid] for mid in msg_ids if mid in msg_map]
335
+
336
+ # ==========================================
337
+ # AUTOCOMPLETE
338
+ # ==========================================
339
+
340
+ def autocomplete_user(self, prefix: str, limit: int = 10) -> list[str]:
341
+ """
342
+ Autocomplete username suggestions.
343
+
344
+ Uses Trie for O(p + k) lookup where p=prefix length, k=results.
345
+ """
346
+ if self.user_trie is None:
347
+ self._build_user_trie()
348
+
349
+ return self.user_trie.autocomplete(prefix, limit=limit)
350
+
351
+ def _build_user_trie(self) -> None:
352
+ """Build Trie index for usernames."""
353
+ self.user_trie = Trie()
354
+
355
+ cursor = self.conn.execute('SELECT user_id, display_name FROM users')
356
+ for row in cursor.fetchall():
357
+ if row['display_name']:
358
+ self.user_trie.insert(row['display_name'], data=row['user_id'])
359
+ if row['user_id']:
360
+ self.user_trie.insert(row['user_id'], data=row['user_id'])
361
+
362
+ # ==========================================
363
+ # CONVENIENCE METHODS
364
+ # ==========================================
365
+
366
+ def search_by_user(self, user_id: str, limit: int = 100) -> list[dict]:
367
+ """Get all messages from a specific user."""
368
+ sql = '''
369
+ SELECT * FROM messages
370
+ WHERE from_id = ?
371
+ ORDER BY date_unixtime DESC
372
+ LIMIT ?
373
+ '''
374
+ cursor = self.conn.execute(sql, (user_id, limit))
375
+ return [dict(row) for row in cursor.fetchall()]
376
+
377
+ def search_by_date_range(
378
+ self,
379
+ from_date: int,
380
+ to_date: int,
381
+ limit: int = 1000
382
+ ) -> list[dict]:
383
+ """Get messages within a date range."""
384
+ sql = '''
385
+ SELECT * FROM messages
386
+ WHERE date_unixtime BETWEEN ? AND ?
387
+ ORDER BY date_unixtime ASC
388
+ LIMIT ?
389
+ '''
390
+ cursor = self.conn.execute(sql, (from_date, to_date, limit))
391
+ return [dict(row) for row in cursor.fetchall()]
392
+
393
+ def get_links(self, limit: int = 100) -> list[dict]:
394
+ """Get all extracted links."""
395
+ sql = '''
396
+ SELECT e.value as url, e.message_id, m.from_name, m.date
397
+ FROM entities e
398
+ JOIN messages m ON e.message_id = m.id
399
+ WHERE e.type = 'link'
400
+ ORDER BY m.date_unixtime DESC
401
+ LIMIT ?
402
+ '''
403
+ cursor = self.conn.execute(sql, (limit,))
404
+ return [dict(row) for row in cursor.fetchall()]
405
+
406
+ def get_mentions(self, username: Optional[str] = None, limit: int = 100) -> list[dict]:
407
+ """Get mentions, optionally filtered by username."""
408
+ if username:
409
+ sql = '''
410
+ SELECT e.value as mention, e.message_id, m.from_name, m.text_plain, m.date
411
+ FROM entities e
412
+ JOIN messages m ON e.message_id = m.id
413
+ WHERE e.type = 'mention' AND e.value LIKE ?
414
+ ORDER BY m.date_unixtime DESC
415
+ LIMIT ?
416
+ '''
417
+ cursor = self.conn.execute(sql, (f'%{username}%', limit))
418
+ else:
419
+ sql = '''
420
+ SELECT e.value as mention, e.message_id, m.from_name, m.text_plain, m.date
421
+ FROM entities e
422
+ JOIN messages m ON e.message_id = m.id
423
+ WHERE e.type = 'mention'
424
+ ORDER BY m.date_unixtime DESC
425
+ LIMIT ?
426
+ '''
427
+ cursor = self.conn.execute(sql, (limit,))
428
+
429
+ return [dict(row) for row in cursor.fetchall()]
430
+
431
+ @property
432
+ def cache_stats(self) -> dict:
433
+ """Get cache statistics."""
434
+ return self.query_cache.stats
435
+
436
+
437
+ def format_result(msg: dict, show_depth: bool = False, depth: int = 0) -> str:
438
+ """Format a message for display."""
439
+ date_str = msg.get('date', 'Unknown date')
440
+ from_name = msg.get('from_name', 'Unknown')
441
+ text = msg.get('text_plain', '')[:200]
442
+ if len(msg.get('text_plain', '')) > 200:
443
+ text += '...'
444
+
445
+ flags = []
446
+ if msg.get('has_links'):
447
+ flags.append('[link]')
448
+ if msg.get('has_mentions'):
449
+ flags.append('[mention]')
450
+ if msg.get('has_media'):
451
+ flags.append('[media]')
452
+ if msg.get('similarity'):
453
+ flags.append(f'[sim:{msg["similarity"]:.2f}]')
454
+ if msg.get('relevance'):
455
+ flags.append(f'[rel:{abs(msg["relevance"]):.2f}]')
456
+
457
+ flags_str = ' '.join(flags)
458
+ indent = ' ' * depth if show_depth else ''
459
+ return f"{indent}[{date_str}] {from_name}: {text} {flags_str}"
460
+
461
+
462
+ def main():
463
+ parser = argparse.ArgumentParser(description='Search indexed Telegram messages')
464
+ parser.add_argument('query', nargs='?', help='Search query')
465
+ parser.add_argument('--db', default='telegram.db', help='Database path')
466
+ parser.add_argument('--user', help='Filter by user ID')
467
+ parser.add_argument('--from-date', help='From date (YYYY-MM-DD)')
468
+ parser.add_argument('--to-date', help='To date (YYYY-MM-DD)')
469
+ parser.add_argument('--links', action='store_true', help='Show only messages with links')
470
+ parser.add_argument('--mentions', action='store_true', help='Show only messages with mentions')
471
+ parser.add_argument('--media', action='store_true', help='Show only messages with media')
472
+ parser.add_argument('--limit', type=int, default=50, help='Max results')
473
+ parser.add_argument('--fuzzy', action='store_true', help='Use fuzzy search')
474
+ parser.add_argument('--threshold', type=float, default=0.3, help='Fuzzy match threshold')
475
+ parser.add_argument('--thread', type=int, help='Show thread for message ID')
476
+ parser.add_argument('--list-links', action='store_true', help='List all extracted links')
477
+ parser.add_argument('--list-mentions', action='store_true', help='List all mentions')
478
+ parser.add_argument('--autocomplete', help='Autocomplete username')
479
+ parser.add_argument('--cache-stats', action='store_true', help='Show cache statistics')
480
+
481
+ args = parser.parse_args()
482
+
483
+ with TelegramSearch(args.db) as search:
484
+ # Show thread
485
+ if args.thread:
486
+ print(f"Thread containing message {args.thread}:\n")
487
+ thread = search.get_thread_with_depth(args.thread)
488
+ for msg, depth in thread:
489
+ print(format_result(msg, show_depth=True, depth=depth))
490
+ return
491
+
492
+ # Autocomplete
493
+ if args.autocomplete:
494
+ suggestions = search.autocomplete_user(args.autocomplete)
495
+ print(f"Suggestions for '{args.autocomplete}':")
496
+ for s in suggestions:
497
+ print(f" {s}")
498
+ return
499
+
500
+ # List links
501
+ if args.list_links:
502
+ links = search.get_links(args.limit)
503
+ print(f"Found {len(links)} links:\n")
504
+ for link in links:
505
+ print(f" {link['url']}")
506
+ print(f" From: {link['from_name']} at {link['date']}")
507
+ return
508
+
509
+ # List mentions
510
+ if args.list_mentions:
511
+ mentions = search.get_mentions(limit=args.limit)
512
+ print(f"Found {len(mentions)} mentions:\n")
513
+ for m in mentions:
514
+ print(f" {m['mention']} by {m['from_name']}")
515
+ return
516
+
517
+ # Cache stats
518
+ if args.cache_stats:
519
+ print(f"Cache stats: {search.cache_stats}")
520
+ return
521
+
522
+ if not args.query:
523
+ parser.print_help()
524
+ return
525
+
526
+ # Parse dates
527
+ from_ts = None
528
+ to_ts = None
529
+ if args.from_date:
530
+ from_ts = int(datetime.strptime(args.from_date, '%Y-%m-%d').timestamp())
531
+ if args.to_date:
532
+ to_ts = int(datetime.strptime(args.to_date, '%Y-%m-%d').timestamp())
533
+
534
+ # Fuzzy or regular search
535
+ if args.fuzzy:
536
+ results = search.fuzzy_search(
537
+ query=args.query,
538
+ threshold=args.threshold,
539
+ limit=args.limit
540
+ )
541
+ print(f"Found {len(results)} fuzzy matches for '{args.query}':\n")
542
+ else:
543
+ results = search.search(
544
+ query=args.query,
545
+ user_id=args.user,
546
+ from_date=from_ts,
547
+ to_date=to_ts,
548
+ has_links=True if args.links else None,
549
+ has_mentions=True if args.mentions else None,
550
+ has_media=True if args.media else None,
551
+ limit=args.limit
552
+ )
553
+ print(f"Found {len(results)} results for '{args.query}':\n")
554
+
555
+ for msg in results:
556
+ print(format_result(msg))
557
+ print()
558
+
559
+ # Show cache stats
560
+ print(f"\nCache: {search.cache_stats}")
561
+
562
+
563
+ if __name__ == '__main__':
564
+ main()
semantic_search.py ADDED
@@ -0,0 +1,411 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Semantic Search using pre-computed embeddings from Colab.
3
+ Lightweight - only needs sentence-transformers for query encoding.
4
+ """
5
+
6
+ import sqlite3
7
+ import numpy as np
8
+ from typing import List, Dict, Any, Optional
9
+
10
+ # Try importing sentence-transformers
11
+ try:
12
+ from sentence_transformers import SentenceTransformer
13
+ HAS_TRANSFORMERS = True
14
+ except ImportError:
15
+ HAS_TRANSFORMERS = False
16
+ SentenceTransformer = None
17
+
18
+
19
+ class SemanticSearch:
20
+ """
21
+ Semantic search using pre-computed embeddings.
22
+
23
+ The embeddings.db file is created by running the Colab notebook.
24
+ This class just loads and searches them.
25
+ """
26
+
27
+ def __init__(self, embeddings_db: str = 'embeddings.db', messages_db: str = 'telegram.db'):
28
+ self.embeddings_db = embeddings_db
29
+ self.messages_db = messages_db
30
+ self.model = None
31
+ self.embeddings_loaded = False
32
+ self.embeddings = []
33
+ self.message_ids = []
34
+ self.from_names = []
35
+ self.text_previews = []
36
+
37
+ def _load_model(self):
38
+ """Load the embedding model (same one used in Colab)."""
39
+ if not HAS_TRANSFORMERS:
40
+ raise RuntimeError(
41
+ "sentence-transformers not installed.\n"
42
+ "Install with: pip install sentence-transformers"
43
+ )
44
+ if self.model is None:
45
+ print("Loading embedding model...")
46
+ self.model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
47
+ print("Model loaded!")
48
+
49
+ def reload_embeddings(self):
50
+ """Force reload embeddings from DB (e.g., after daily sync adds new ones)."""
51
+ self.embeddings_loaded = False
52
+ self.embeddings = np.array([]).reshape(0, 0)
53
+ self.message_ids = []
54
+ self.from_names = []
55
+ self.text_previews = []
56
+ self._load_embeddings()
57
+
58
+ def _load_embeddings(self):
59
+ """Load all embeddings into memory for fast search."""
60
+ if self.embeddings_loaded:
61
+ return
62
+
63
+ import os
64
+ if not os.path.exists(self.embeddings_db):
65
+ print(f"Embeddings DB not found: {self.embeddings_db}")
66
+ self.embeddings_loaded = True
67
+ self.embeddings = np.array([]).reshape(0, 0)
68
+ return
69
+
70
+ print(f"Loading embeddings from {self.embeddings_db}...")
71
+ conn = sqlite3.connect(self.embeddings_db)
72
+ cursor = conn.execute(
73
+ "SELECT message_id, from_name, text_preview, embedding FROM embeddings"
74
+ )
75
+
76
+ emb_list = []
77
+ for row in cursor:
78
+ msg_id, name, text, emb_blob = row
79
+ emb = np.frombuffer(emb_blob, dtype=np.float32)
80
+ self.message_ids.append(msg_id)
81
+ self.from_names.append(name or '')
82
+ self.text_previews.append(text or '')
83
+ emb_list.append(emb)
84
+
85
+ conn.close()
86
+
87
+ if len(emb_list) == 0:
88
+ print("No embeddings found in database")
89
+ self.embeddings = np.array([]).reshape(0, 0)
90
+ self.embeddings_loaded = True
91
+ return
92
+
93
+ # Stack into numpy array for fast computation
94
+ self.embeddings = np.vstack(emb_list)
95
+ # Normalize embeddings for cosine similarity
96
+ norms = np.linalg.norm(self.embeddings, axis=1, keepdims=True)
97
+ norms = np.where(norms == 0, 1, norms) # Avoid division by zero
98
+ self.embeddings = self.embeddings / norms
99
+ self.embeddings_loaded = True
100
+ print(f"Loaded {len(self.message_ids)} embeddings")
101
+
102
+ def search(self, query: str, limit: int = 50, min_score: float = 0.3) -> List[Dict[str, Any]]:
103
+ """
104
+ Search for semantically similar messages.
105
+
106
+ Args:
107
+ query: The search query
108
+ limit: Max results to return
109
+ min_score: Minimum similarity score (0-1)
110
+
111
+ Returns:
112
+ List of dicts with message_id, from_name, text, score
113
+ """
114
+ self._load_model()
115
+ self._load_embeddings()
116
+
117
+ if len(self.message_ids) == 0:
118
+ return []
119
+
120
+ # Encode query
121
+ query_emb = self.model.encode([query], convert_to_numpy=True)[0]
122
+
123
+ # Compute cosine similarity with all embeddings
124
+ # embeddings are already normalized from Colab
125
+ query_norm = query_emb / np.linalg.norm(query_emb)
126
+ similarities = np.dot(self.embeddings, query_norm)
127
+
128
+ # Get top results
129
+ top_indices = np.argsort(similarities)[::-1][:limit * 2] # Get more, then filter
130
+
131
+ results = []
132
+ for idx in top_indices:
133
+ score = float(similarities[idx])
134
+ if score < min_score:
135
+ continue
136
+ results.append({
137
+ 'message_id': int(self.message_ids[idx]),
138
+ 'from_name': self.from_names[idx],
139
+ 'text': self.text_previews[idx],
140
+ 'score': score
141
+ })
142
+ if len(results) >= limit:
143
+ break
144
+
145
+ return results
146
+
147
+ def search_with_full_text(self, query: str, limit: int = 20) -> List[Dict[str, Any]]:
148
+ """
149
+ Search and return full message text from messages DB.
150
+ """
151
+ results = self.search(query, limit=limit)
152
+
153
+ if not results:
154
+ return []
155
+
156
+ # Get full text from messages DB
157
+ conn = sqlite3.connect(self.messages_db)
158
+ conn.row_factory = sqlite3.Row
159
+
160
+ for result in results:
161
+ cursor = conn.execute(
162
+ "SELECT date, from_name, text_plain, reply_to_message_id FROM messages WHERE id = ?",
163
+ (result['message_id'],)
164
+ )
165
+ row = cursor.fetchone()
166
+ if row:
167
+ result['date'] = row['date']
168
+ result['from_name'] = row['from_name']
169
+ result['text'] = row['text_plain']
170
+ result['reply_to_message_id'] = row['reply_to_message_id']
171
+
172
+ conn.close()
173
+ return results
174
+
175
+ def _add_thread_context(self, results: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
176
+ """
177
+ Add FULL thread context to search results.
178
+ For each message, find the entire conversation thread:
179
+ 1. Go up to find the root message
180
+ 2. Get all messages in that thread
181
+ """
182
+ if not results:
183
+ return results
184
+
185
+ conn = sqlite3.connect(self.messages_db)
186
+ conn.row_factory = sqlite3.Row
187
+
188
+ all_messages = {r['message_id']: r for r in results}
189
+ thread_roots = set()
190
+
191
+ # Step 1: Find root messages by following reply chains UP
192
+ for result in results:
193
+ msg_id = result['message_id']
194
+ reply_to = result.get('reply_to_message_id')
195
+
196
+ # Follow the chain up to find the root
197
+ current_id = msg_id
198
+ current_reply_to = reply_to
199
+ visited = {current_id}
200
+
201
+ while current_reply_to and current_reply_to not in visited:
202
+ visited.add(current_reply_to)
203
+ cursor = conn.execute(
204
+ "SELECT id, reply_to_message_id FROM messages WHERE id = ?",
205
+ (current_reply_to,)
206
+ )
207
+ row = cursor.fetchone()
208
+ if row:
209
+ current_id = row['id']
210
+ current_reply_to = row['reply_to_message_id']
211
+ else:
212
+ break
213
+
214
+ # current_id is now the root of this thread
215
+ thread_roots.add(current_id)
216
+
217
+ # Step 2: Get ALL messages in these threads (recursively)
218
+ def get_thread_messages(root_ids, depth=0, max_depth=10):
219
+ """Recursively get all messages in threads."""
220
+ if not root_ids or depth > max_depth:
221
+ return []
222
+
223
+ messages = []
224
+
225
+ # Get root messages themselves
226
+ if root_ids:
227
+ placeholders = ','.join('?' * len(root_ids))
228
+ cursor = conn.execute(f"""
229
+ SELECT id, date, from_name, text_plain, reply_to_message_id
230
+ FROM messages WHERE id IN ({placeholders})
231
+ """, list(root_ids))
232
+ for row in cursor:
233
+ if row['id'] not in all_messages:
234
+ messages.append({
235
+ 'message_id': row['id'],
236
+ 'date': row['date'],
237
+ 'from_name': row['from_name'],
238
+ 'text': row['text_plain'],
239
+ 'reply_to_message_id': row['reply_to_message_id'],
240
+ 'is_thread_context': True
241
+ })
242
+ all_messages[row['id']] = messages[-1]
243
+
244
+ # Get all replies to these messages
245
+ all_ids = set(root_ids) | set(all_messages.keys())
246
+ if all_ids:
247
+ placeholders = ','.join('?' * len(all_ids))
248
+ cursor = conn.execute(f"""
249
+ SELECT id, date, from_name, text_plain, reply_to_message_id
250
+ FROM messages WHERE reply_to_message_id IN ({placeholders})
251
+ LIMIT 200
252
+ """, list(all_ids))
253
+
254
+ new_ids = set()
255
+ for row in cursor:
256
+ if row['id'] not in all_messages:
257
+ msg = {
258
+ 'message_id': row['id'],
259
+ 'date': row['date'],
260
+ 'from_name': row['from_name'],
261
+ 'text': row['text_plain'],
262
+ 'reply_to_message_id': row['reply_to_message_id'],
263
+ 'is_thread_context': True
264
+ }
265
+ messages.append(msg)
266
+ all_messages[row['id']] = msg
267
+ new_ids.add(row['id'])
268
+
269
+ # Recursively get replies to the new messages
270
+ if new_ids:
271
+ messages.extend(get_thread_messages(new_ids, depth + 1, max_depth))
272
+
273
+ return messages
274
+
275
+ # Get all thread messages
276
+ get_thread_messages(thread_roots)
277
+
278
+ conn.close()
279
+
280
+ # Sort all messages by date
281
+ all_list = list(all_messages.values())
282
+ all_list.sort(key=lambda x: x.get('date', '') or '')
283
+
284
+ return all_list
285
+
286
+ def search_with_ai_answer(self, query: str, ai_engine, limit: int = 30) -> Dict[str, Any]:
287
+ """
288
+ Search semantically and send results to AI for reasoning.
289
+
290
+ This combines the power of:
291
+ 1. Semantic search (finds relevant messages by meaning)
292
+ 2. Thread context (includes replies to/from found messages)
293
+ 3. AI reasoning (reads messages and answers the question)
294
+ """
295
+ results = self.search_with_full_text(query, limit=limit)
296
+
297
+ if not results:
298
+ return {
299
+ 'query': query,
300
+ 'answer': 'לא נמצאו הודעות רלוונטיות',
301
+ 'mode': 'semantic_ai',
302
+ 'results': [],
303
+ 'count': 0
304
+ }
305
+
306
+ # Get thread context for each result
307
+ results_with_threads = self._add_thread_context(results)
308
+
309
+ # Build context from semantic search results + threads
310
+ context_text = "\n".join([
311
+ f"[{r.get('date', '')}] {r.get('from_name', 'Unknown')}: {r.get('text', '')[:500]}"
312
+ for r in results_with_threads if r.get('text')
313
+ ])
314
+
315
+ # Send to AI for reasoning
316
+ reason_prompt = f"""You are analyzing a Telegram chat history to answer a question.
317
+ The messages below were found using semantic search, along with their thread context (replies).
318
+ Read them carefully and provide a comprehensive answer.
319
+
320
+ Question: {query}
321
+
322
+ Relevant messages and their threads:
323
+ {context_text}
324
+
325
+ Based on these messages, answer the question in Hebrew.
326
+ If you can find the answer, provide it clearly.
327
+ Pay special attention to reply chains - the answer might be in a reply!
328
+ If you can infer information from context clues, do so.
329
+ Cite specific messages when relevant.
330
+
331
+ Answer:"""
332
+
333
+ try:
334
+ # Call the appropriate AI provider based on engine configuration
335
+ provider = getattr(ai_engine, 'provider', None)
336
+ if provider == 'gemini':
337
+ answer = ai_engine._call_gemini(reason_prompt)
338
+ elif provider == 'groq':
339
+ answer = ai_engine._call_groq(reason_prompt)
340
+ elif provider == 'ollama':
341
+ answer = ai_engine._call_ollama(reason_prompt)
342
+ else:
343
+ answer = "AI engine not available for reasoning"
344
+ except Exception as e:
345
+ answer = f"שגיאה ב-AI: {str(e)}"
346
+
347
+ return {
348
+ 'query': query,
349
+ 'answer': answer,
350
+ 'mode': 'semantic_ai',
351
+ 'results': results, # Original results for display
352
+ 'count': len(results),
353
+ 'total_with_threads': len(results_with_threads)
354
+ }
355
+
356
+ def is_available(self) -> bool:
357
+ """Check if semantic search is available (DB exists and has embeddings)."""
358
+ import os
359
+ if not HAS_TRANSFORMERS or not os.path.exists(self.embeddings_db):
360
+ return False
361
+ try:
362
+ conn = sqlite3.connect(self.embeddings_db)
363
+ count = conn.execute("SELECT COUNT(*) FROM embeddings").fetchone()[0]
364
+ conn.close()
365
+ return count > 0
366
+ except Exception:
367
+ return False
368
+
369
+ def stats(self) -> Dict[str, Any]:
370
+ """Get statistics about the embeddings."""
371
+ import os
372
+
373
+ if not os.path.exists(self.embeddings_db):
374
+ return {'available': False, 'error': 'embeddings.db not found'}
375
+
376
+ conn = sqlite3.connect(self.embeddings_db)
377
+ cursor = conn.execute("SELECT COUNT(*) FROM embeddings")
378
+ count = cursor.fetchone()[0]
379
+ conn.close()
380
+
381
+ size_mb = os.path.getsize(self.embeddings_db) / (1024 * 1024)
382
+
383
+ return {
384
+ 'available': True,
385
+ 'count': count,
386
+ 'size_mb': round(size_mb, 1),
387
+ 'model': 'paraphrase-multilingual-MiniLM-L12-v2'
388
+ }
389
+
390
+
391
+ # Singleton instance
392
+ _search_instance = None
393
+
394
+ def get_semantic_search() -> SemanticSearch:
395
+ """Get or create semantic search instance."""
396
+ global _search_instance
397
+ if _search_instance is None:
398
+ _search_instance = SemanticSearch()
399
+ return _search_instance
400
+
401
+
402
+ if __name__ == '__main__':
403
+ # Test
404
+ ss = SemanticSearch()
405
+ print("Stats:", ss.stats())
406
+
407
+ if ss.is_available():
408
+ results = ss.search("איפה אתה עובד?", limit=5)
409
+ print("\nResults for 'איפה אתה עובד?':")
410
+ for r in results:
411
+ print(f" [{r['score']:.3f}] {r['from_name']}: {r['text'][:60]}...")
static/css/style.css ADDED
@@ -0,0 +1,859 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* ==========================================
2
+ TELEGRAM ANALYTICS DASHBOARD - CSS
3
+ ========================================== */
4
+
5
+ :root {
6
+ /* Colors */
7
+ --primary: #0088cc;
8
+ --primary-dark: #006699;
9
+ --primary-light: #33a3d9;
10
+ --secondary: #6c757d;
11
+ --success: #28a745;
12
+ --warning: #ffc107;
13
+ --danger: #dc3545;
14
+ --info: #17a2b8;
15
+
16
+ /* Dark theme */
17
+ --bg-dark: #1a1a2e;
18
+ --bg-card: #16213e;
19
+ --bg-sidebar: #0f0f23;
20
+ --text-primary: #ffffff;
21
+ --text-secondary: #a0aec0;
22
+ --text-muted: #718096;
23
+ --border-color: #2d3748;
24
+
25
+ /* Spacing */
26
+ --sidebar-width: 250px;
27
+ --header-height: 70px;
28
+ --spacing-xs: 0.25rem;
29
+ --spacing-sm: 0.5rem;
30
+ --spacing-md: 1rem;
31
+ --spacing-lg: 1.5rem;
32
+ --spacing-xl: 2rem;
33
+
34
+ /* Border radius */
35
+ --radius-sm: 4px;
36
+ --radius-md: 8px;
37
+ --radius-lg: 12px;
38
+ }
39
+
40
+ /* Reset */
41
+ * {
42
+ margin: 0;
43
+ padding: 0;
44
+ box-sizing: border-box;
45
+ }
46
+
47
+ body {
48
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, sans-serif;
49
+ background: var(--bg-dark);
50
+ color: var(--text-primary);
51
+ min-height: 100vh;
52
+ display: flex;
53
+ }
54
+
55
+ /* ==========================================
56
+ SIDEBAR
57
+ ========================================== */
58
+
59
+ .sidebar {
60
+ width: var(--sidebar-width);
61
+ background: var(--bg-sidebar);
62
+ height: 100vh;
63
+ position: fixed;
64
+ left: 0;
65
+ top: 0;
66
+ display: flex;
67
+ flex-direction: column;
68
+ border-right: 1px solid var(--border-color);
69
+ z-index: 100;
70
+ }
71
+
72
+ .logo {
73
+ padding: var(--spacing-lg);
74
+ display: flex;
75
+ align-items: center;
76
+ gap: var(--spacing-md);
77
+ border-bottom: 1px solid var(--border-color);
78
+ }
79
+
80
+ .logo-icon {
81
+ font-size: 2rem;
82
+ }
83
+
84
+ .logo-text {
85
+ font-size: 1.25rem;
86
+ font-weight: 700;
87
+ color: var(--primary);
88
+ }
89
+
90
+ .nav-menu {
91
+ list-style: none;
92
+ padding: var(--spacing-md);
93
+ flex: 1;
94
+ }
95
+
96
+ .nav-item {
97
+ margin-bottom: var(--spacing-xs);
98
+ }
99
+
100
+ .nav-link {
101
+ display: flex;
102
+ align-items: center;
103
+ gap: var(--spacing-md);
104
+ padding: var(--spacing-md);
105
+ color: var(--text-secondary);
106
+ text-decoration: none;
107
+ border-radius: var(--radius-md);
108
+ transition: all 0.2s ease;
109
+ }
110
+
111
+ .nav-link:hover {
112
+ background: var(--bg-card);
113
+ color: var(--text-primary);
114
+ }
115
+
116
+ .nav-item.active .nav-link {
117
+ background: var(--primary);
118
+ color: white;
119
+ }
120
+
121
+ .nav-link .icon {
122
+ font-size: 1.25rem;
123
+ }
124
+
125
+ .sidebar-footer {
126
+ padding: var(--spacing-md);
127
+ border-top: 1px solid var(--border-color);
128
+ }
129
+
130
+ .export-buttons {
131
+ display: flex;
132
+ flex-direction: column;
133
+ gap: var(--spacing-sm);
134
+ }
135
+
136
+ /* ==========================================
137
+ MAIN CONTENT
138
+ ========================================== */
139
+
140
+ .main-content {
141
+ margin-left: var(--sidebar-width);
142
+ flex: 1;
143
+ padding: var(--spacing-lg);
144
+ max-width: calc(100vw - var(--sidebar-width));
145
+ }
146
+
147
+ /* ==========================================
148
+ HEADER
149
+ ========================================== */
150
+
151
+ .header {
152
+ display: flex;
153
+ justify-content: space-between;
154
+ align-items: center;
155
+ margin-bottom: var(--spacing-xl);
156
+ padding-bottom: var(--spacing-lg);
157
+ border-bottom: 1px solid var(--border-color);
158
+ }
159
+
160
+ .header h1 {
161
+ font-size: 1.75rem;
162
+ font-weight: 600;
163
+ }
164
+
165
+ .header-controls {
166
+ display: flex;
167
+ gap: var(--spacing-md);
168
+ align-items: center;
169
+ }
170
+
171
+ /* ==========================================
172
+ BUTTONS & INPUTS
173
+ ========================================== */
174
+
175
+ .btn {
176
+ padding: var(--spacing-sm) var(--spacing-md);
177
+ border: none;
178
+ border-radius: var(--radius-md);
179
+ cursor: pointer;
180
+ font-size: 0.875rem;
181
+ font-weight: 500;
182
+ transition: all 0.2s ease;
183
+ display: inline-flex;
184
+ align-items: center;
185
+ gap: var(--spacing-sm);
186
+ background: var(--bg-card);
187
+ color: var(--text-primary);
188
+ border: 1px solid var(--border-color);
189
+ }
190
+
191
+ .btn:hover {
192
+ background: var(--border-color);
193
+ }
194
+
195
+ .btn-primary {
196
+ background: var(--primary);
197
+ color: white;
198
+ border: none;
199
+ }
200
+
201
+ .btn-primary:hover {
202
+ background: var(--primary-dark);
203
+ }
204
+
205
+ .btn-sm {
206
+ padding: var(--spacing-xs) var(--spacing-sm);
207
+ font-size: 0.75rem;
208
+ }
209
+
210
+ .select, .select-sm {
211
+ padding: var(--spacing-sm) var(--spacing-md);
212
+ border: 1px solid var(--border-color);
213
+ border-radius: var(--radius-md);
214
+ background: var(--bg-card);
215
+ color: var(--text-primary);
216
+ font-size: 0.875rem;
217
+ cursor: pointer;
218
+ }
219
+
220
+ .select-sm {
221
+ padding: var(--spacing-xs) var(--spacing-sm);
222
+ font-size: 0.75rem;
223
+ }
224
+
225
+ .select:focus, .select-sm:focus {
226
+ outline: none;
227
+ border-color: var(--primary);
228
+ }
229
+
230
+ input[type="text"], input[type="search"] {
231
+ padding: var(--spacing-sm) var(--spacing-md);
232
+ border: 1px solid var(--border-color);
233
+ border-radius: var(--radius-md);
234
+ background: var(--bg-card);
235
+ color: var(--text-primary);
236
+ font-size: 0.875rem;
237
+ width: 100%;
238
+ }
239
+
240
+ input:focus {
241
+ outline: none;
242
+ border-color: var(--primary);
243
+ }
244
+
245
+ /* ==========================================
246
+ STATS CARDS
247
+ ========================================== */
248
+
249
+ .stats-grid {
250
+ display: grid;
251
+ grid-template-columns: repeat(auto-fit, minmax(180px, 1fr));
252
+ gap: var(--spacing-md);
253
+ margin-bottom: var(--spacing-xl);
254
+ }
255
+
256
+ .stat-card {
257
+ background: var(--bg-card);
258
+ border-radius: var(--radius-lg);
259
+ padding: var(--spacing-lg);
260
+ display: flex;
261
+ align-items: center;
262
+ gap: var(--spacing-md);
263
+ border: 1px solid var(--border-color);
264
+ transition: transform 0.2s ease, box-shadow 0.2s ease;
265
+ }
266
+
267
+ .stat-card:hover {
268
+ transform: translateY(-2px);
269
+ box-shadow: 0 4px 12px rgba(0, 0, 0, 0.3);
270
+ }
271
+
272
+ .stat-icon {
273
+ font-size: 2.5rem;
274
+ opacity: 0.9;
275
+ }
276
+
277
+ .stat-value {
278
+ font-size: 1.75rem;
279
+ font-weight: 700;
280
+ color: var(--text-primary);
281
+ }
282
+
283
+ .stat-label {
284
+ font-size: 0.875rem;
285
+ color: var(--text-muted);
286
+ margin-top: var(--spacing-xs);
287
+ }
288
+
289
+ /* ==========================================
290
+ CHARTS
291
+ ========================================== */
292
+
293
+ .charts-row {
294
+ display: grid;
295
+ grid-template-columns: repeat(2, 1fr);
296
+ gap: var(--spacing-lg);
297
+ margin-bottom: var(--spacing-xl);
298
+ }
299
+
300
+ .chart-card {
301
+ background: var(--bg-card);
302
+ border-radius: var(--radius-lg);
303
+ padding: var(--spacing-lg);
304
+ border: 1px solid var(--border-color);
305
+ }
306
+
307
+ .chart-card.large {
308
+ grid-column: span 1;
309
+ }
310
+
311
+ .chart-card.full-width {
312
+ grid-column: span 2;
313
+ }
314
+
315
+ .chart-header {
316
+ display: flex;
317
+ justify-content: space-between;
318
+ align-items: center;
319
+ margin-bottom: var(--spacing-md);
320
+ }
321
+
322
+ .chart-header h3 {
323
+ font-size: 1rem;
324
+ font-weight: 600;
325
+ color: var(--text-primary);
326
+ }
327
+
328
+ .chart-subtitle {
329
+ font-size: 0.75rem;
330
+ color: var(--text-muted);
331
+ }
332
+
333
+ .chart-container {
334
+ position: relative;
335
+ height: 250px;
336
+ }
337
+
338
+ /* ==========================================
339
+ HEATMAP
340
+ ========================================== */
341
+
342
+ .heatmap-container {
343
+ overflow-x: auto;
344
+ }
345
+
346
+ .heatmap-table {
347
+ width: 100%;
348
+ border-collapse: collapse;
349
+ font-size: 0.75rem;
350
+ }
351
+
352
+ .heatmap-table th,
353
+ .heatmap-table td {
354
+ padding: var(--spacing-xs);
355
+ text-align: center;
356
+ min-width: 35px;
357
+ }
358
+
359
+ .heatmap-table th {
360
+ color: var(--text-muted);
361
+ font-weight: 500;
362
+ }
363
+
364
+ .heatmap-cell {
365
+ width: 30px;
366
+ height: 30px;
367
+ border-radius: var(--radius-sm);
368
+ display: inline-block;
369
+ transition: transform 0.2s ease;
370
+ }
371
+
372
+ .heatmap-cell:hover {
373
+ transform: scale(1.2);
374
+ }
375
+
376
+ .day-label {
377
+ text-align: right;
378
+ padding-right: var(--spacing-sm) !important;
379
+ color: var(--text-secondary);
380
+ }
381
+
382
+ /* ==========================================
383
+ LISTS
384
+ ========================================== */
385
+
386
+ .lists-row {
387
+ display: grid;
388
+ grid-template-columns: repeat(3, 1fr);
389
+ gap: var(--spacing-lg);
390
+ margin-bottom: var(--spacing-xl);
391
+ }
392
+
393
+ .list-card {
394
+ background: var(--bg-card);
395
+ border-radius: var(--radius-lg);
396
+ border: 1px solid var(--border-color);
397
+ overflow: hidden;
398
+ }
399
+
400
+ .list-header {
401
+ display: flex;
402
+ justify-content: space-between;
403
+ align-items: center;
404
+ padding: var(--spacing-md) var(--spacing-lg);
405
+ border-bottom: 1px solid var(--border-color);
406
+ }
407
+
408
+ .list-header h3 {
409
+ font-size: 1rem;
410
+ font-weight: 600;
411
+ }
412
+
413
+ .link {
414
+ color: var(--primary);
415
+ text-decoration: none;
416
+ font-size: 0.875rem;
417
+ }
418
+
419
+ .link:hover {
420
+ text-decoration: underline;
421
+ }
422
+
423
+ .list-content {
424
+ max-height: 350px;
425
+ overflow-y: auto;
426
+ }
427
+
428
+ .list-item {
429
+ display: flex;
430
+ align-items: center;
431
+ padding: var(--spacing-sm) var(--spacing-lg);
432
+ border-bottom: 1px solid var(--border-color);
433
+ gap: var(--spacing-md);
434
+ }
435
+
436
+ .list-item:last-child {
437
+ border-bottom: none;
438
+ }
439
+
440
+ .list-item:hover {
441
+ background: rgba(255, 255, 255, 0.02);
442
+ }
443
+
444
+ .list-rank {
445
+ font-weight: 700;
446
+ color: var(--text-muted);
447
+ min-width: 30px;
448
+ }
449
+
450
+ .list-rank.gold { color: #ffd700; }
451
+ .list-rank.silver { color: #c0c0c0; }
452
+ .list-rank.bronze { color: #cd7f32; }
453
+
454
+ .list-info {
455
+ flex: 1;
456
+ min-width: 0;
457
+ }
458
+
459
+ .list-name {
460
+ font-weight: 500;
461
+ white-space: nowrap;
462
+ overflow: hidden;
463
+ text-overflow: ellipsis;
464
+ }
465
+
466
+ .list-subtitle {
467
+ font-size: 0.75rem;
468
+ color: var(--text-muted);
469
+ }
470
+
471
+ .list-value {
472
+ font-weight: 600;
473
+ color: var(--primary);
474
+ }
475
+
476
+ /* ==========================================
477
+ USERS PAGE
478
+ ========================================== */
479
+
480
+ .users-table {
481
+ width: 100%;
482
+ border-collapse: collapse;
483
+ }
484
+
485
+ .users-table th,
486
+ .users-table td {
487
+ padding: var(--spacing-md);
488
+ text-align: left;
489
+ border-bottom: 1px solid var(--border-color);
490
+ }
491
+
492
+ .users-table th {
493
+ background: var(--bg-sidebar);
494
+ font-weight: 600;
495
+ color: var(--text-secondary);
496
+ font-size: 0.875rem;
497
+ position: sticky;
498
+ top: 0;
499
+ }
500
+
501
+ .users-table tr:hover {
502
+ background: rgba(255, 255, 255, 0.02);
503
+ }
504
+
505
+ .user-avatar {
506
+ width: 36px;
507
+ height: 36px;
508
+ border-radius: 50%;
509
+ background: var(--primary);
510
+ display: flex;
511
+ align-items: center;
512
+ justify-content: center;
513
+ font-weight: 700;
514
+ font-size: 0.875rem;
515
+ }
516
+
517
+ .user-cell {
518
+ display: flex;
519
+ align-items: center;
520
+ gap: var(--spacing-md);
521
+ }
522
+
523
+ .progress-bar {
524
+ height: 6px;
525
+ background: var(--border-color);
526
+ border-radius: 3px;
527
+ overflow: hidden;
528
+ margin-top: var(--spacing-xs);
529
+ }
530
+
531
+ .progress-fill {
532
+ height: 100%;
533
+ background: var(--primary);
534
+ border-radius: 3px;
535
+ }
536
+
537
+ /* ==========================================
538
+ SEARCH PAGE
539
+ ========================================== */
540
+
541
+ .search-box {
542
+ display: flex;
543
+ gap: var(--spacing-md);
544
+ margin-bottom: var(--spacing-xl);
545
+ }
546
+
547
+ .search-input {
548
+ flex: 1;
549
+ }
550
+
551
+ .search-results {
552
+ background: var(--bg-card);
553
+ border-radius: var(--radius-lg);
554
+ border: 1px solid var(--border-color);
555
+ }
556
+
557
+ .search-result-item {
558
+ padding: var(--spacing-lg);
559
+ border-bottom: 1px solid var(--border-color);
560
+ }
561
+
562
+ .search-result-item:last-child {
563
+ border-bottom: none;
564
+ }
565
+
566
+ .search-result-header {
567
+ display: flex;
568
+ justify-content: space-between;
569
+ align-items: center;
570
+ margin-bottom: var(--spacing-sm);
571
+ }
572
+
573
+ .search-result-author {
574
+ font-weight: 600;
575
+ color: var(--primary);
576
+ }
577
+
578
+ .search-result-date {
579
+ font-size: 0.75rem;
580
+ color: var(--text-muted);
581
+ }
582
+
583
+ .search-result-text {
584
+ color: var(--text-secondary);
585
+ line-height: 1.5;
586
+ }
587
+
588
+ .search-highlight {
589
+ background: rgba(0, 136, 204, 0.3);
590
+ padding: 0 2px;
591
+ border-radius: 2px;
592
+ }
593
+
594
+ /* ==========================================
595
+ PAGINATION
596
+ ========================================== */
597
+
598
+ .pagination {
599
+ display: flex;
600
+ justify-content: center;
601
+ gap: var(--spacing-sm);
602
+ margin-top: var(--spacing-xl);
603
+ }
604
+
605
+ .page-btn {
606
+ padding: var(--spacing-sm) var(--spacing-md);
607
+ background: var(--bg-card);
608
+ border: 1px solid var(--border-color);
609
+ border-radius: var(--radius-md);
610
+ color: var(--text-primary);
611
+ cursor: pointer;
612
+ transition: all 0.2s ease;
613
+ }
614
+
615
+ .page-btn:hover {
616
+ background: var(--border-color);
617
+ }
618
+
619
+ .page-btn.active {
620
+ background: var(--primary);
621
+ border-color: var(--primary);
622
+ }
623
+
624
+ .page-btn:disabled {
625
+ opacity: 0.5;
626
+ cursor: not-allowed;
627
+ }
628
+
629
+ /* ==========================================
630
+ USER MODAL
631
+ ========================================== */
632
+
633
+ .modal-overlay {
634
+ position: fixed;
635
+ top: 0;
636
+ left: 0;
637
+ right: 0;
638
+ bottom: 0;
639
+ background: rgba(0, 0, 0, 0.7);
640
+ display: flex;
641
+ align-items: center;
642
+ justify-content: center;
643
+ z-index: 1000;
644
+ opacity: 0;
645
+ visibility: hidden;
646
+ transition: all 0.3s ease;
647
+ }
648
+
649
+ .modal-overlay.active {
650
+ opacity: 1;
651
+ visibility: visible;
652
+ }
653
+
654
+ .modal {
655
+ background: var(--bg-card);
656
+ border-radius: var(--radius-lg);
657
+ width: 90%;
658
+ max-width: 600px;
659
+ max-height: 80vh;
660
+ overflow-y: auto;
661
+ border: 1px solid var(--border-color);
662
+ transform: translateY(-20px);
663
+ transition: transform 0.3s ease;
664
+ }
665
+
666
+ .modal-overlay.active .modal {
667
+ transform: translateY(0);
668
+ }
669
+
670
+ .modal-header {
671
+ display: flex;
672
+ justify-content: space-between;
673
+ align-items: center;
674
+ padding: var(--spacing-lg);
675
+ border-bottom: 1px solid var(--border-color);
676
+ }
677
+
678
+ .modal-header h2 {
679
+ font-size: 1.25rem;
680
+ }
681
+
682
+ .modal-close {
683
+ background: none;
684
+ border: none;
685
+ font-size: 1.5rem;
686
+ color: var(--text-secondary);
687
+ cursor: pointer;
688
+ }
689
+
690
+ .modal-body {
691
+ padding: var(--spacing-lg);
692
+ }
693
+
694
+ .user-profile {
695
+ display: flex;
696
+ align-items: center;
697
+ gap: var(--spacing-lg);
698
+ margin-bottom: var(--spacing-xl);
699
+ }
700
+
701
+ .user-profile-avatar {
702
+ width: 80px;
703
+ height: 80px;
704
+ border-radius: 50%;
705
+ background: var(--primary);
706
+ display: flex;
707
+ align-items: center;
708
+ justify-content: center;
709
+ font-size: 2rem;
710
+ font-weight: 700;
711
+ }
712
+
713
+ .user-profile-info h3 {
714
+ font-size: 1.5rem;
715
+ margin-bottom: var(--spacing-xs);
716
+ }
717
+
718
+ .user-profile-info p {
719
+ color: var(--text-muted);
720
+ }
721
+
722
+ .user-stats-grid {
723
+ display: grid;
724
+ grid-template-columns: repeat(3, 1fr);
725
+ gap: var(--spacing-md);
726
+ margin-bottom: var(--spacing-xl);
727
+ }
728
+
729
+ .user-stat {
730
+ text-align: center;
731
+ padding: var(--spacing-md);
732
+ background: var(--bg-sidebar);
733
+ border-radius: var(--radius-md);
734
+ }
735
+
736
+ .user-stat-value {
737
+ font-size: 1.5rem;
738
+ font-weight: 700;
739
+ color: var(--primary);
740
+ }
741
+
742
+ .user-stat-label {
743
+ font-size: 0.75rem;
744
+ color: var(--text-muted);
745
+ margin-top: var(--spacing-xs);
746
+ }
747
+
748
+ /* ==========================================
749
+ LOADING & EMPTY STATES
750
+ ========================================== */
751
+
752
+ .loading {
753
+ display: flex;
754
+ align-items: center;
755
+ justify-content: center;
756
+ padding: var(--spacing-xl);
757
+ color: var(--text-muted);
758
+ }
759
+
760
+ .spinner {
761
+ width: 40px;
762
+ height: 40px;
763
+ border: 3px solid var(--border-color);
764
+ border-top-color: var(--primary);
765
+ border-radius: 50%;
766
+ animation: spin 1s linear infinite;
767
+ }
768
+
769
+ @keyframes spin {
770
+ to { transform: rotate(360deg); }
771
+ }
772
+
773
+ .empty-state {
774
+ text-align: center;
775
+ padding: var(--spacing-xl);
776
+ color: var(--text-muted);
777
+ }
778
+
779
+ .empty-state-icon {
780
+ font-size: 3rem;
781
+ margin-bottom: var(--spacing-md);
782
+ opacity: 0.5;
783
+ }
784
+
785
+ /* ==========================================
786
+ RESPONSIVE
787
+ ========================================== */
788
+
789
+ @media (max-width: 1200px) {
790
+ .lists-row {
791
+ grid-template-columns: repeat(2, 1fr);
792
+ }
793
+ }
794
+
795
+ @media (max-width: 992px) {
796
+ .sidebar {
797
+ width: 70px;
798
+ }
799
+
800
+ .logo-text, .nav-link span:not(.icon) {
801
+ display: none;
802
+ }
803
+
804
+ .main-content {
805
+ margin-left: 70px;
806
+ max-width: calc(100vw - 70px);
807
+ }
808
+
809
+ .charts-row {
810
+ grid-template-columns: 1fr;
811
+ }
812
+
813
+ .chart-card.full-width,
814
+ .chart-card.large {
815
+ grid-column: span 1;
816
+ }
817
+
818
+ .lists-row {
819
+ grid-template-columns: 1fr;
820
+ }
821
+ }
822
+
823
+ @media (max-width: 768px) {
824
+ .stats-grid {
825
+ grid-template-columns: repeat(2, 1fr);
826
+ }
827
+
828
+ .header {
829
+ flex-direction: column;
830
+ gap: var(--spacing-md);
831
+ align-items: flex-start;
832
+ }
833
+
834
+ .user-stats-grid {
835
+ grid-template-columns: repeat(2, 1fr);
836
+ }
837
+ }
838
+
839
+ /* ==========================================
840
+ SCROLLBAR
841
+ ========================================== */
842
+
843
+ ::-webkit-scrollbar {
844
+ width: 8px;
845
+ height: 8px;
846
+ }
847
+
848
+ ::-webkit-scrollbar-track {
849
+ background: var(--bg-sidebar);
850
+ }
851
+
852
+ ::-webkit-scrollbar-thumb {
853
+ background: var(--border-color);
854
+ border-radius: 4px;
855
+ }
856
+
857
+ ::-webkit-scrollbar-thumb:hover {
858
+ background: var(--text-muted);
859
+ }
static/js/dashboard.js ADDED
@@ -0,0 +1,622 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Telegram Analytics Dashboard - JavaScript
3
+ *
4
+ * Handles all interactivity:
5
+ * - Data fetching from API
6
+ * - Chart rendering with Chart.js
7
+ * - Real-time updates
8
+ * - User interactions
9
+ * - Export functionality
10
+ */
11
+
12
+ // ==========================================
13
+ // GLOBAL STATE
14
+ // ==========================================
15
+
16
+ const state = {
17
+ timeframe: 'month',
18
+ charts: {},
19
+ autoRefresh: null,
20
+ currentPage: 1,
21
+ usersPerPage: 20
22
+ };
23
+
24
+ // Chart.js default configuration
25
+ Chart.defaults.color = '#a0aec0';
26
+ Chart.defaults.borderColor = '#2d3748';
27
+ Chart.defaults.font.family = '-apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif';
28
+
29
+ // ==========================================
30
+ // UTILITY FUNCTIONS
31
+ // ==========================================
32
+
33
+ function formatNumber(num) {
34
+ if (num >= 1000000) return (num / 1000000).toFixed(1) + 'M';
35
+ if (num >= 1000) return (num / 1000).toFixed(1) + 'K';
36
+ return num.toLocaleString();
37
+ }
38
+
39
+ function formatDate(timestamp) {
40
+ if (!timestamp) return '-';
41
+ return new Date(timestamp * 1000).toLocaleDateString('en-US', {
42
+ year: 'numeric',
43
+ month: 'short',
44
+ day: 'numeric'
45
+ });
46
+ }
47
+
48
+ function getTimeframe() {
49
+ const select = document.getElementById('timeframe');
50
+ return select ? select.value : state.timeframe;
51
+ }
52
+
53
+ async function fetchAPI(endpoint) {
54
+ try {
55
+ const timeframe = getTimeframe();
56
+ const separator = endpoint.includes('?') ? '&' : '?';
57
+ const response = await fetch(`${endpoint}${separator}timeframe=${timeframe}`);
58
+ return await response.json();
59
+ } catch (error) {
60
+ console.error('API Error:', error);
61
+ return null;
62
+ }
63
+ }
64
+
65
+ function showLoading(elementId) {
66
+ const element = document.getElementById(elementId);
67
+ if (element) {
68
+ element.innerHTML = '<div class="loading"><div class="spinner"></div></div>';
69
+ }
70
+ }
71
+
72
+ function showEmpty(elementId, message = 'No data available') {
73
+ const element = document.getElementById(elementId);
74
+ if (element) {
75
+ element.innerHTML = `
76
+ <div class="empty-state">
77
+ <div class="empty-state-icon">📭</div>
78
+ <p>${message}</p>
79
+ </div>
80
+ `;
81
+ }
82
+ }
83
+
84
+ // ==========================================
85
+ // DATA LOADING
86
+ // ==========================================
87
+
88
+ async function loadAllData() {
89
+ state.timeframe = getTimeframe();
90
+
91
+ // Load all data in parallel
92
+ await Promise.all([
93
+ loadOverviewStats(),
94
+ loadMessagesChart(),
95
+ loadUsersChart(),
96
+ loadHourlyChart(),
97
+ loadDailyChart(),
98
+ loadHeatmap(),
99
+ loadTopUsers(),
100
+ loadTopWords(),
101
+ loadTopDomains()
102
+ ]);
103
+ }
104
+
105
+ async function loadOverviewStats() {
106
+ const data = await fetchAPI('/api/overview');
107
+ if (!data) return;
108
+
109
+ // Update stat cards
110
+ document.getElementById('total-messages').textContent = formatNumber(data.total_messages);
111
+ document.getElementById('active-users').textContent = formatNumber(data.active_users);
112
+ document.getElementById('messages-per-day').textContent = formatNumber(data.messages_per_day);
113
+ document.getElementById('links-count').textContent = formatNumber(data.links_count);
114
+ document.getElementById('media-count').textContent = formatNumber(data.media_count);
115
+ document.getElementById('replies-count').textContent = formatNumber(data.replies_count);
116
+ }
117
+
118
+ // ==========================================
119
+ // CHARTS
120
+ // ==========================================
121
+
122
+ async function loadMessagesChart() {
123
+ const granularitySelect = document.getElementById('messages-granularity');
124
+ const granularity = granularitySelect ? granularitySelect.value : 'day';
125
+
126
+ const data = await fetchAPI(`/api/chart/messages?granularity=${granularity}`);
127
+ if (!data || data.length === 0) return;
128
+
129
+ const ctx = document.getElementById('messages-chart');
130
+ if (!ctx) return;
131
+
132
+ // Destroy existing chart
133
+ if (state.charts.messages) {
134
+ state.charts.messages.destroy();
135
+ }
136
+
137
+ state.charts.messages = new Chart(ctx, {
138
+ type: 'line',
139
+ data: {
140
+ labels: data.map(d => d.label),
141
+ datasets: [{
142
+ label: 'Messages',
143
+ data: data.map(d => d.value),
144
+ borderColor: '#0088cc',
145
+ backgroundColor: 'rgba(0, 136, 204, 0.1)',
146
+ fill: true,
147
+ tension: 0.4,
148
+ pointRadius: 2,
149
+ pointHoverRadius: 5
150
+ }]
151
+ },
152
+ options: {
153
+ responsive: true,
154
+ maintainAspectRatio: false,
155
+ plugins: {
156
+ legend: { display: false }
157
+ },
158
+ scales: {
159
+ x: {
160
+ grid: { display: false },
161
+ ticks: { maxTicksLimit: 10 }
162
+ },
163
+ y: {
164
+ beginAtZero: true,
165
+ grid: { color: '#2d3748' }
166
+ }
167
+ },
168
+ interaction: {
169
+ intersect: false,
170
+ mode: 'index'
171
+ }
172
+ }
173
+ });
174
+ }
175
+
176
+ async function loadUsersChart() {
177
+ const data = await fetchAPI('/api/chart/users?granularity=day');
178
+ if (!data || data.length === 0) return;
179
+
180
+ const ctx = document.getElementById('users-chart');
181
+ if (!ctx) return;
182
+
183
+ if (state.charts.users) {
184
+ state.charts.users.destroy();
185
+ }
186
+
187
+ state.charts.users = new Chart(ctx, {
188
+ type: 'line',
189
+ data: {
190
+ labels: data.map(d => d.label),
191
+ datasets: [{
192
+ label: 'Active Users',
193
+ data: data.map(d => d.value),
194
+ borderColor: '#28a745',
195
+ backgroundColor: 'rgba(40, 167, 69, 0.1)',
196
+ fill: true,
197
+ tension: 0.4,
198
+ pointRadius: 2,
199
+ pointHoverRadius: 5
200
+ }]
201
+ },
202
+ options: {
203
+ responsive: true,
204
+ maintainAspectRatio: false,
205
+ plugins: {
206
+ legend: { display: false }
207
+ },
208
+ scales: {
209
+ x: {
210
+ grid: { display: false },
211
+ ticks: { maxTicksLimit: 10 }
212
+ },
213
+ y: {
214
+ beginAtZero: true,
215
+ grid: { color: '#2d3748' }
216
+ }
217
+ }
218
+ }
219
+ });
220
+ }
221
+
222
+ async function loadHourlyChart() {
223
+ const data = await fetchAPI('/api/chart/hourly');
224
+ if (!data || data.length === 0) return;
225
+
226
+ const ctx = document.getElementById('hourly-chart');
227
+ if (!ctx) return;
228
+
229
+ if (state.charts.hourly) {
230
+ state.charts.hourly.destroy();
231
+ }
232
+
233
+ state.charts.hourly = new Chart(ctx, {
234
+ type: 'bar',
235
+ data: {
236
+ labels: data.map(d => d.label),
237
+ datasets: [{
238
+ label: 'Messages',
239
+ data: data.map(d => d.value),
240
+ backgroundColor: '#0088cc',
241
+ borderRadius: 4
242
+ }]
243
+ },
244
+ options: {
245
+ responsive: true,
246
+ maintainAspectRatio: false,
247
+ plugins: {
248
+ legend: { display: false }
249
+ },
250
+ scales: {
251
+ x: {
252
+ grid: { display: false },
253
+ ticks: { maxTicksLimit: 12 }
254
+ },
255
+ y: {
256
+ beginAtZero: true,
257
+ grid: { color: '#2d3748' }
258
+ }
259
+ }
260
+ }
261
+ });
262
+ }
263
+
264
+ async function loadDailyChart() {
265
+ const data = await fetchAPI('/api/chart/daily');
266
+ if (!data || data.length === 0) return;
267
+
268
+ const ctx = document.getElementById('daily-chart');
269
+ if (!ctx) return;
270
+
271
+ if (state.charts.daily) {
272
+ state.charts.daily.destroy();
273
+ }
274
+
275
+ const colors = [
276
+ '#dc3545', // Sunday - red
277
+ '#ffc107', // Monday - yellow
278
+ '#28a745', // Tuesday - green
279
+ '#17a2b8', // Wednesday - cyan
280
+ '#0088cc', // Thursday - blue
281
+ '#6f42c1', // Friday - purple
282
+ '#fd7e14' // Saturday - orange
283
+ ];
284
+
285
+ state.charts.daily = new Chart(ctx, {
286
+ type: 'bar',
287
+ data: {
288
+ labels: data.map(d => d.label.substring(0, 3)),
289
+ datasets: [{
290
+ label: 'Messages',
291
+ data: data.map(d => d.value),
292
+ backgroundColor: colors,
293
+ borderRadius: 4
294
+ }]
295
+ },
296
+ options: {
297
+ responsive: true,
298
+ maintainAspectRatio: false,
299
+ plugins: {
300
+ legend: { display: false }
301
+ },
302
+ scales: {
303
+ x: {
304
+ grid: { display: false }
305
+ },
306
+ y: {
307
+ beginAtZero: true,
308
+ grid: { color: '#2d3748' }
309
+ }
310
+ }
311
+ }
312
+ });
313
+ }
314
+
315
+ async function loadHeatmap() {
316
+ const data = await fetchAPI('/api/chart/heatmap');
317
+ if (!data || !data.data) return;
318
+
319
+ const container = document.getElementById('heatmap');
320
+ if (!container) return;
321
+
322
+ // Find max value for color scaling
323
+ const maxValue = Math.max(...data.data.flat());
324
+
325
+ // Generate color based on intensity
326
+ function getColor(value) {
327
+ if (value === 0) return 'rgba(0, 136, 204, 0.1)';
328
+ const intensity = value / maxValue;
329
+ return `rgba(0, 136, 204, ${0.2 + intensity * 0.8})`;
330
+ }
331
+
332
+ let html = '<table class="heatmap-table"><thead><tr><th></th>';
333
+
334
+ // Hour headers
335
+ for (let h = 0; h < 24; h++) {
336
+ html += `<th>${h}</th>`;
337
+ }
338
+ html += '</tr></thead><tbody>';
339
+
340
+ // Day rows
341
+ data.days.forEach((day, dayIndex) => {
342
+ html += `<tr><td class="day-label">${day.substring(0, 3)}</td>`;
343
+ for (let h = 0; h < 24; h++) {
344
+ const value = data.data[dayIndex][h];
345
+ const color = getColor(value);
346
+ html += `<td><div class="heatmap-cell" style="background: ${color}" title="${day} ${h}:00 - ${value} messages"></div></td>`;
347
+ }
348
+ html += '</tr>';
349
+ });
350
+
351
+ html += '</tbody></table>';
352
+ container.innerHTML = html;
353
+ }
354
+
355
+ // ==========================================
356
+ // TOP LISTS
357
+ // ==========================================
358
+
359
+ async function loadTopUsers() {
360
+ const listElement = document.getElementById('top-users-list');
361
+ if (!listElement) return;
362
+
363
+ showLoading('top-users-list');
364
+
365
+ const data = await fetchAPI('/api/users?limit=10');
366
+ if (!data || !data.users || data.users.length === 0) {
367
+ showEmpty('top-users-list');
368
+ return;
369
+ }
370
+
371
+ let html = '';
372
+ data.users.forEach((user, index) => {
373
+ const rankClass = index === 0 ? 'gold' : index === 1 ? 'silver' : index === 2 ? 'bronze' : '';
374
+ const initial = user.name.charAt(0).toUpperCase();
375
+
376
+ html += `
377
+ <div class="list-item" onclick="window.location.href='/user/${user.user_id}'" style="cursor: pointer">
378
+ <div class="list-rank ${rankClass}">#${user.rank}</div>
379
+ <div class="user-avatar">${initial}</div>
380
+ <div class="list-info">
381
+ <div class="list-name">${escapeHtml(user.name)}</div>
382
+ <div class="list-subtitle">${user.percentage}% of total</div>
383
+ </div>
384
+ <div class="list-value">${formatNumber(user.messages)}</div>
385
+ </div>
386
+ `;
387
+ });
388
+
389
+ listElement.innerHTML = html;
390
+ }
391
+
392
+ async function loadTopWords() {
393
+ const listElement = document.getElementById('top-words-list');
394
+ if (!listElement) return;
395
+
396
+ showLoading('top-words-list');
397
+
398
+ const data = await fetchAPI('/api/top/words?limit=10');
399
+ if (!data || data.length === 0) {
400
+ showEmpty('top-words-list');
401
+ return;
402
+ }
403
+
404
+ const maxCount = data[0].count;
405
+ let html = '';
406
+
407
+ data.forEach((item, index) => {
408
+ const percentage = (item.count / maxCount * 100).toFixed(0);
409
+ html += `
410
+ <div class="list-item">
411
+ <div class="list-rank">#${index + 1}</div>
412
+ <div class="list-info">
413
+ <div class="list-name">${escapeHtml(item.word)}</div>
414
+ <div class="progress-bar">
415
+ <div class="progress-fill" style="width: ${percentage}%"></div>
416
+ </div>
417
+ </div>
418
+ <div class="list-value">${formatNumber(item.count)}</div>
419
+ </div>
420
+ `;
421
+ });
422
+
423
+ listElement.innerHTML = html;
424
+ }
425
+
426
+ async function loadTopDomains() {
427
+ const listElement = document.getElementById('top-domains-list');
428
+ if (!listElement) return;
429
+
430
+ showLoading('top-domains-list');
431
+
432
+ const data = await fetchAPI('/api/top/domains?limit=10');
433
+ if (!data || data.length === 0) {
434
+ showEmpty('top-domains-list');
435
+ return;
436
+ }
437
+
438
+ const maxCount = data[0].count;
439
+ let html = '';
440
+
441
+ data.forEach((item, index) => {
442
+ const percentage = (item.count / maxCount * 100).toFixed(0);
443
+ html += `
444
+ <div class="list-item">
445
+ <div class="list-rank">#${index + 1}</div>
446
+ <div class="list-info">
447
+ <div class="list-name">${escapeHtml(item.domain)}</div>
448
+ <div class="progress-bar">
449
+ <div class="progress-fill" style="width: ${percentage}%"></div>
450
+ </div>
451
+ </div>
452
+ <div class="list-value">${formatNumber(item.count)}</div>
453
+ </div>
454
+ `;
455
+ });
456
+
457
+ listElement.innerHTML = html;
458
+ }
459
+
460
+ // ==========================================
461
+ // USER MODAL
462
+ // ==========================================
463
+
464
+ async function openUserModal(userId) {
465
+ // Create modal if it doesn't exist
466
+ let modal = document.getElementById('user-modal');
467
+ if (!modal) {
468
+ modal = document.createElement('div');
469
+ modal.id = 'user-modal';
470
+ modal.className = 'modal-overlay';
471
+ modal.innerHTML = `
472
+ <div class="modal">
473
+ <div class="modal-header">
474
+ <h2>User Details</h2>
475
+ <button class="modal-close" onclick="closeUserModal()">&times;</button>
476
+ </div>
477
+ <div class="modal-body" id="user-modal-content">
478
+ <div class="loading"><div class="spinner"></div></div>
479
+ </div>
480
+ </div>
481
+ `;
482
+ document.body.appendChild(modal);
483
+
484
+ // Close on backdrop click
485
+ modal.addEventListener('click', (e) => {
486
+ if (e.target === modal) closeUserModal();
487
+ });
488
+ }
489
+
490
+ modal.classList.add('active');
491
+ document.getElementById('user-modal-content').innerHTML = '<div class="loading"><div class="spinner"></div></div>';
492
+
493
+ const data = await fetchAPI(`/api/user/${userId}`);
494
+ if (!data || data.error) {
495
+ document.getElementById('user-modal-content').innerHTML = '<div class="empty-state"><p>User not found</p></div>';
496
+ return;
497
+ }
498
+
499
+ const initial = data.name.charAt(0).toUpperCase();
500
+
501
+ document.getElementById('user-modal-content').innerHTML = `
502
+ <div class="user-profile">
503
+ <div class="user-profile-avatar">${initial}</div>
504
+ <div class="user-profile-info">
505
+ <h3>${escapeHtml(data.name)}</h3>
506
+ <p>Rank #${data.rank} • Member since ${formatDate(data.first_seen)}</p>
507
+ </div>
508
+ </div>
509
+
510
+ <div class="user-stats-grid">
511
+ <div class="user-stat">
512
+ <div class="user-stat-value">${formatNumber(data.messages)}</div>
513
+ <div class="user-stat-label">Messages</div>
514
+ </div>
515
+ <div class="user-stat">
516
+ <div class="user-stat-value">${formatNumber(data.characters)}</div>
517
+ <div class="user-stat-label">Characters</div>
518
+ </div>
519
+ <div class="user-stat">
520
+ <div class="user-stat-value">${data.daily_average}</div>
521
+ <div class="user-stat-label">Daily Avg</div>
522
+ </div>
523
+ <div class="user-stat">
524
+ <div class="user-stat-value">${formatNumber(data.links)}</div>
525
+ <div class="user-stat-label">Links</div>
526
+ </div>
527
+ <div class="user-stat">
528
+ <div class="user-stat-value">${formatNumber(data.media)}</div>
529
+ <div class="user-stat-label">Media</div>
530
+ </div>
531
+ <div class="user-stat">
532
+ <div class="user-stat-value">${data.active_days}</div>
533
+ <div class="user-stat-label">Active Days</div>
534
+ </div>
535
+ </div>
536
+
537
+ <h4 style="margin-bottom: 1rem;">Activity by Hour</h4>
538
+ <canvas id="user-hourly-chart" height="150"></canvas>
539
+ `;
540
+
541
+ // Render user's hourly chart
542
+ const ctx = document.getElementById('user-hourly-chart');
543
+ new Chart(ctx, {
544
+ type: 'bar',
545
+ data: {
546
+ labels: Array.from({length: 24}, (_, i) => `${i}:00`),
547
+ datasets: [{
548
+ data: data.hourly_activity,
549
+ backgroundColor: '#0088cc',
550
+ borderRadius: 2
551
+ }]
552
+ },
553
+ options: {
554
+ responsive: true,
555
+ maintainAspectRatio: false,
556
+ plugins: { legend: { display: false } },
557
+ scales: {
558
+ x: { grid: { display: false }, ticks: { maxTicksLimit: 12 } },
559
+ y: { beginAtZero: true, grid: { color: '#2d3748' } }
560
+ }
561
+ }
562
+ });
563
+ }
564
+
565
+ function closeUserModal() {
566
+ const modal = document.getElementById('user-modal');
567
+ if (modal) modal.classList.remove('active');
568
+ }
569
+
570
+ // ==========================================
571
+ // EXPORT FUNCTIONS
572
+ // ==========================================
573
+
574
+ function exportUsers() {
575
+ const timeframe = getTimeframe();
576
+ window.location.href = `/api/export/users?timeframe=${timeframe}`;
577
+ }
578
+
579
+ function exportMessages() {
580
+ const timeframe = getTimeframe();
581
+ window.location.href = `/api/export/messages?timeframe=${timeframe}`;
582
+ }
583
+
584
+ // ==========================================
585
+ // AUTO REFRESH
586
+ // ==========================================
587
+
588
+ function toggleAutoRefresh() {
589
+ if (state.autoRefresh) {
590
+ clearInterval(state.autoRefresh);
591
+ state.autoRefresh = null;
592
+ console.log('Auto-refresh disabled');
593
+ } else {
594
+ state.autoRefresh = setInterval(loadAllData, 60000); // Refresh every minute
595
+ console.log('Auto-refresh enabled (60s)');
596
+ }
597
+ }
598
+
599
+ // ==========================================
600
+ // UTILITY
601
+ // ==========================================
602
+
603
+ function escapeHtml(text) {
604
+ const div = document.createElement('div');
605
+ div.textContent = text;
606
+ return div.innerHTML;
607
+ }
608
+
609
+ // Keyboard shortcuts
610
+ document.addEventListener('keydown', (e) => {
611
+ // Escape to close modal
612
+ if (e.key === 'Escape') {
613
+ closeUserModal();
614
+ }
615
+ // R to refresh
616
+ if (e.key === 'r' && !e.ctrlKey && !e.metaKey) {
617
+ const activeElement = document.activeElement;
618
+ if (activeElement.tagName !== 'INPUT' && activeElement.tagName !== 'TEXTAREA') {
619
+ loadAllData();
620
+ }
621
+ }
622
+ });
templates/chat.html ADDED
@@ -0,0 +1,831 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="he" dir="rtl">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Chat View - Telegram Style</title>
7
+ <style>
8
+ /* ===== Telegram-like Chat Viewer ===== */
9
+ :root {
10
+ --bg-primary: #0e1621;
11
+ --bg-secondary: #17212b;
12
+ --bg-message: #182533;
13
+ --bg-hover: #1e2c3a;
14
+ --bg-reply: rgba(77, 184, 255, 0.08);
15
+ --bg-forward: rgba(100, 191, 71, 0.08);
16
+ --text-primary: #f5f5f5;
17
+ --text-secondary: #8b9fad;
18
+ --text-link: #6ab2f2;
19
+ --accent-blue: #6ab2f2;
20
+ --accent-green: #6dc264;
21
+ --border-reply: #6ab2f2;
22
+ --border-forward: #6dc264;
23
+ --date-badge: #1b2a38;
24
+ --nav-bg: #17212b;
25
+ --nav-border: #0e1621;
26
+ }
27
+
28
+ * { box-sizing: border-box; margin: 0; padding: 0; }
29
+
30
+ body {
31
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
32
+ font-size: 14px;
33
+ line-height: 1.5;
34
+ background-color: var(--bg-primary);
35
+ color: var(--text-primary);
36
+ }
37
+
38
+ /* ===== Navigation ===== */
39
+ .nav-bar {
40
+ position: fixed;
41
+ top: 0; left: 0; right: 0;
42
+ z-index: 100;
43
+ background-color: var(--nav-bg);
44
+ border-bottom: 1px solid var(--nav-border);
45
+ padding: 0 16px;
46
+ }
47
+
48
+ .nav-content {
49
+ max-width: 800px;
50
+ margin: 0 auto;
51
+ display: flex;
52
+ align-items: center;
53
+ justify-content: space-between;
54
+ height: 56px;
55
+ }
56
+
57
+ .nav-title {
58
+ font-size: 18px;
59
+ font-weight: 700;
60
+ color: var(--text-primary);
61
+ }
62
+
63
+ .nav-links { display: flex; gap: 4px; }
64
+
65
+ .nav-links a {
66
+ color: var(--accent-blue);
67
+ text-decoration: none;
68
+ padding: 8px 14px;
69
+ border-radius: 8px;
70
+ font-size: 13px;
71
+ transition: background 0.15s;
72
+ }
73
+
74
+ .nav-links a:hover { background-color: var(--bg-hover); }
75
+
76
+ .nav-links a.active {
77
+ background-color: var(--accent-blue);
78
+ color: var(--bg-primary);
79
+ }
80
+
81
+ /* ===== Chat Area ===== */
82
+ .chat-wrap {
83
+ padding-top: 56px;
84
+ min-height: 100vh;
85
+ }
86
+
87
+ .chat-body {
88
+ max-width: 680px;
89
+ margin: 0 auto;
90
+ padding: 0 12px 80px;
91
+ }
92
+
93
+ .history { padding: 8px 0; }
94
+
95
+ /* ===== Load More ===== */
96
+ .load-more {
97
+ text-align: center;
98
+ padding: 16px;
99
+ }
100
+
101
+ .load-more button {
102
+ padding: 10px 24px;
103
+ background-color: var(--bg-secondary);
104
+ color: var(--accent-blue);
105
+ border: 1px solid rgba(106, 178, 242, 0.3);
106
+ border-radius: 20px;
107
+ cursor: pointer;
108
+ font-size: 14px;
109
+ transition: all 0.15s;
110
+ }
111
+
112
+ .load-more button:hover {
113
+ background-color: var(--bg-hover);
114
+ border-color: var(--accent-blue);
115
+ }
116
+
117
+ .load-more button:disabled { opacity: 0.4; cursor: not-allowed; }
118
+
119
+ /* ===== Date Separator ===== */
120
+ .date-separator {
121
+ display: flex;
122
+ align-items: center;
123
+ justify-content: center;
124
+ padding: 12px 0;
125
+ position: sticky;
126
+ top: 60px;
127
+ z-index: 10;
128
+ }
129
+
130
+ .date-badge {
131
+ padding: 4px 12px;
132
+ background-color: var(--date-badge);
133
+ border-radius: 12px;
134
+ color: var(--text-secondary);
135
+ font-size: 13px;
136
+ font-weight: 500;
137
+ box-shadow: 0 1px 4px rgba(0,0,0,0.2);
138
+ }
139
+
140
+ /* ===== Message ===== */
141
+ .msg {
142
+ display: flex;
143
+ align-items: flex-start;
144
+ gap: 10px;
145
+ padding: 3px 8px;
146
+ border-radius: 8px;
147
+ transition: background 0.15s;
148
+ }
149
+
150
+ .msg:hover { background-color: var(--bg-hover); }
151
+
152
+ .msg.joined { padding-top: 1px; }
153
+ .msg.joined .avatar-wrap { visibility: hidden; height: 0; }
154
+
155
+ /* ===== Avatar ===== */
156
+ .avatar-wrap { flex-shrink: 0; padding-top: 2px; }
157
+
158
+ .avatar {
159
+ width: 40px;
160
+ height: 40px;
161
+ border-radius: 50%;
162
+ display: flex;
163
+ align-items: center;
164
+ justify-content: center;
165
+ font-weight: 600;
166
+ font-size: 15px;
167
+ color: #fff;
168
+ cursor: pointer;
169
+ }
170
+
171
+ .avatar:hover { filter: brightness(1.15); }
172
+
173
+ /* 8 Telegram avatar colors */
174
+ .c1 { background: #ff5555; }
175
+ .c2 { background: #64bf47; }
176
+ .c3 { background: #ffab00; }
177
+ .c4 { background: #4f9cd9; }
178
+ .c5 { background: #9884e8; }
179
+ .c6 { background: #e671a5; }
180
+ .c7 { background: #47bcd1; }
181
+ .c8 { background: #ff8c44; }
182
+
183
+ /* Name colors to match avatars */
184
+ .name-c1 { color: #ff5555; }
185
+ .name-c2 { color: #64bf47; }
186
+ .name-c3 { color: #ffab00; }
187
+ .name-c4 { color: #4f9cd9; }
188
+ .name-c5 { color: #9884e8; }
189
+ .name-c6 { color: #e671a5; }
190
+ .name-c7 { color: #47bcd1; }
191
+ .name-c8 { color: #ff8c44; }
192
+
193
+ /* ===== Message Body ===== */
194
+ .msg-body {
195
+ flex: 1;
196
+ min-width: 0;
197
+ }
198
+
199
+ /* Header: name + time */
200
+ .msg-header {
201
+ display: flex;
202
+ align-items: baseline;
203
+ gap: 8px;
204
+ margin-bottom: 2px;
205
+ }
206
+
207
+ .msg-name {
208
+ font-weight: 600;
209
+ font-size: 14px;
210
+ cursor: pointer;
211
+ }
212
+
213
+ .msg-name:hover { text-decoration: underline; }
214
+
215
+ .msg-time {
216
+ color: var(--text-secondary);
217
+ font-size: 12px;
218
+ white-space: nowrap;
219
+ }
220
+
221
+ .msg-edited {
222
+ color: var(--text-secondary);
223
+ font-size: 11px;
224
+ font-style: italic;
225
+ }
226
+
227
+ /* ===== Reply Block ===== */
228
+ .reply-block {
229
+ display: flex;
230
+ gap: 0;
231
+ margin: 4px 0 6px;
232
+ padding: 6px 10px;
233
+ border-radius: 6px;
234
+ border-right: 3px solid var(--border-reply);
235
+ background: var(--bg-reply);
236
+ cursor: pointer;
237
+ overflow: hidden;
238
+ transition: background 0.15s;
239
+ }
240
+
241
+ .reply-block:hover { background: rgba(106, 178, 242, 0.15); }
242
+
243
+ .reply-content { min-width: 0; }
244
+
245
+ .reply-name {
246
+ font-weight: 600;
247
+ font-size: 13px;
248
+ color: var(--accent-blue);
249
+ }
250
+
251
+ .reply-text {
252
+ font-size: 13px;
253
+ color: var(--text-secondary);
254
+ white-space: nowrap;
255
+ overflow: hidden;
256
+ text-overflow: ellipsis;
257
+ max-width: 400px;
258
+ }
259
+
260
+ /* ===== Forward Block ===== */
261
+ .forward-block {
262
+ margin: 4px 0 6px;
263
+ padding: 6px 10px;
264
+ border-radius: 6px;
265
+ border-right: 3px solid var(--border-forward);
266
+ background: var(--bg-forward);
267
+ }
268
+
269
+ .forward-label {
270
+ font-size: 12px;
271
+ color: var(--text-secondary);
272
+ }
273
+
274
+ .forward-name {
275
+ font-weight: 600;
276
+ font-size: 13px;
277
+ color: var(--accent-green);
278
+ }
279
+
280
+ /* ===== Message Text ===== */
281
+ .msg-text {
282
+ word-wrap: break-word;
283
+ overflow-wrap: break-word;
284
+ line-height: 1.55;
285
+ unicode-bidi: plaintext;
286
+ text-align: start;
287
+ white-space: pre-wrap;
288
+ }
289
+
290
+ .msg-text a {
291
+ color: var(--text-link);
292
+ text-decoration: none;
293
+ }
294
+
295
+ .msg-text a:hover { text-decoration: underline; }
296
+
297
+ /* Mention */
298
+ .mention {
299
+ color: var(--accent-blue);
300
+ font-weight: 500;
301
+ cursor: pointer;
302
+ }
303
+
304
+ .mention:hover { text-decoration: underline; }
305
+
306
+ /* Hashtag */
307
+ .hashtag {
308
+ color: var(--accent-blue);
309
+ cursor: pointer;
310
+ }
311
+
312
+ /* Code */
313
+ .msg-text code {
314
+ font-family: 'Consolas', 'Monaco', 'Courier New', monospace;
315
+ background: rgba(255,255,255,0.06);
316
+ padding: 1px 5px;
317
+ border-radius: 4px;
318
+ font-size: 13px;
319
+ }
320
+
321
+ .msg-text pre {
322
+ background: rgba(0,0,0,0.3);
323
+ padding: 10px 12px;
324
+ border-radius: 8px;
325
+ margin: 6px 0;
326
+ overflow-x: auto;
327
+ font-family: 'Consolas', 'Monaco', 'Courier New', monospace;
328
+ font-size: 13px;
329
+ line-height: 1.4;
330
+ }
331
+
332
+ /* ===== Entities (links, media) ===== */
333
+ .entity-links {
334
+ margin-top: 6px;
335
+ display: flex;
336
+ flex-wrap: wrap;
337
+ gap: 6px;
338
+ }
339
+
340
+ .entity-link {
341
+ display: inline-flex;
342
+ align-items: center;
343
+ gap: 5px;
344
+ padding: 4px 10px;
345
+ background: rgba(106, 178, 242, 0.1);
346
+ border-radius: 8px;
347
+ font-size: 13px;
348
+ color: var(--text-link);
349
+ text-decoration: none;
350
+ max-width: 350px;
351
+ overflow: hidden;
352
+ text-overflow: ellipsis;
353
+ white-space: nowrap;
354
+ transition: background 0.15s;
355
+ }
356
+
357
+ .entity-link:hover {
358
+ background: rgba(106, 178, 242, 0.2);
359
+ text-decoration: none;
360
+ }
361
+
362
+ .entity-link .link-icon { font-size: 11px; }
363
+ .entity-link .link-domain {
364
+ opacity: 0.7;
365
+ font-size: 12px;
366
+ }
367
+
368
+ /* ===== Media Badge ===== */
369
+ .media-badge {
370
+ display: inline-flex;
371
+ align-items: center;
372
+ gap: 6px;
373
+ padding: 5px 10px;
374
+ background: var(--bg-secondary);
375
+ border-radius: 8px;
376
+ margin-top: 6px;
377
+ font-size: 13px;
378
+ color: var(--text-secondary);
379
+ }
380
+
381
+ .media-badge .media-icon { font-size: 14px; }
382
+
383
+ /* ===== Time for joined messages ===== */
384
+ .msg-time-inline {
385
+ color: var(--text-secondary);
386
+ font-size: 12px;
387
+ margin-top: 2px;
388
+ opacity: 0;
389
+ transition: opacity 0.15s;
390
+ }
391
+
392
+ .msg:hover .msg-time-inline { opacity: 1; }
393
+
394
+ /* ===== Selected (highlight on go-to) ===== */
395
+ .msg.selected {
396
+ background-color: rgba(106, 178, 242, 0.15);
397
+ transition: background-color 2s ease;
398
+ }
399
+
400
+ /* ===== Scroll-to-bottom ===== */
401
+ .scroll-btn {
402
+ position: fixed;
403
+ bottom: 24px;
404
+ left: 50%;
405
+ transform: translateX(-50%);
406
+ width: 44px;
407
+ height: 44px;
408
+ background: var(--bg-secondary);
409
+ color: var(--accent-blue);
410
+ border: 1px solid rgba(106, 178, 242, 0.3);
411
+ border-radius: 50%;
412
+ cursor: pointer;
413
+ font-size: 20px;
414
+ display: none;
415
+ align-items: center;
416
+ justify-content: center;
417
+ box-shadow: 0 2px 12px rgba(0,0,0,0.4);
418
+ z-index: 80;
419
+ transition: all 0.15s;
420
+ }
421
+
422
+ .scroll-btn.visible { display: flex; }
423
+ .scroll-btn:hover {
424
+ background: var(--accent-blue);
425
+ color: var(--bg-primary);
426
+ }
427
+
428
+ /* ===== Loading ===== */
429
+ .loading {
430
+ text-align: center;
431
+ padding: 24px;
432
+ color: var(--text-secondary);
433
+ }
434
+
435
+ .spinner {
436
+ display: inline-block;
437
+ width: 24px; height: 24px;
438
+ border: 3px solid var(--bg-secondary);
439
+ border-top-color: var(--accent-blue);
440
+ border-radius: 50%;
441
+ animation: spin 1s linear infinite;
442
+ margin-bottom: 8px;
443
+ }
444
+
445
+ @keyframes spin { to { transform: rotate(360deg); } }
446
+
447
+ /* ===== Toast ===== */
448
+ .toast {
449
+ position: fixed;
450
+ bottom: 80px;
451
+ left: 50%;
452
+ transform: translateX(-50%);
453
+ background: rgba(0,0,0,0.85);
454
+ color: #fff;
455
+ padding: 10px 24px;
456
+ border-radius: 20px;
457
+ z-index: 200;
458
+ opacity: 0;
459
+ transition: opacity 0.3s;
460
+ font-size: 13px;
461
+ }
462
+
463
+ .toast.visible { opacity: 1; }
464
+
465
+ /* ===== Responsive ===== */
466
+ @media (max-width: 700px) {
467
+ .nav-links a { padding: 6px 8px; font-size: 12px; }
468
+ .chat-body { padding: 0 4px 80px; }
469
+ .reply-text { max-width: 200px; }
470
+ .entity-link { max-width: 250px; }
471
+ }
472
+ </style>
473
+ </head>
474
+ <body>
475
+ <nav class="nav-bar">
476
+ <div class="nav-content">
477
+ <div class="nav-title">Chat View</div>
478
+ <div class="nav-links">
479
+ <a href="/">Overview</a>
480
+ <a href="/users">Users</a>
481
+ <a href="/chat" class="active">Chat</a>
482
+ <a href="/search">Search</a>
483
+ <a href="/moderation">Moderation</a>
484
+ <a href="/settings">Settings</a>
485
+ </div>
486
+ </div>
487
+ </nav>
488
+
489
+ <div class="chat-wrap">
490
+ <div class="chat-body">
491
+ <div class="history" id="history">
492
+ <div class="load-more" id="load-more-top">
493
+ <button onclick="loadOlderMessages()" id="load-older-btn">&#8593; Load earlier messages</button>
494
+ </div>
495
+ <div id="messages-container"></div>
496
+ <div class="loading" id="loading">
497
+ <div class="spinner"></div>
498
+ <div>Loading messages...</div>
499
+ </div>
500
+ </div>
501
+ </div>
502
+ </div>
503
+
504
+ <button class="scroll-btn" id="scroll-bottom" onclick="scrollToBottom()">&#8595;</button>
505
+ <div class="toast" id="toast"></div>
506
+
507
+ <script>
508
+ // ===== State =====
509
+ let allMessages = [];
510
+ let oldestOffset = 0;
511
+ let totalMessages = 0;
512
+ let loading = false;
513
+ let initialLoad = true;
514
+ const BATCH_SIZE = 100;
515
+ const userColors = {};
516
+
517
+ // ===== Utilities =====
518
+ function getUserColor(userId) {
519
+ if (!userColors[userId]) {
520
+ let hash = 0;
521
+ const str = String(userId);
522
+ for (let i = 0; i < str.length; i++) {
523
+ hash = str.charCodeAt(i) + ((hash << 5) - hash);
524
+ }
525
+ userColors[userId] = (Math.abs(hash) % 8) + 1;
526
+ }
527
+ return userColors[userId];
528
+ }
529
+
530
+ function getInitials(name) {
531
+ if (!name) return '?';
532
+ const parts = name.trim().split(/\s+/);
533
+ if (parts.length >= 2) return (parts[0][0] + parts[1][0]).toUpperCase();
534
+ return name.substring(0, 2).toUpperCase();
535
+ }
536
+
537
+ function formatDate(dateStr) {
538
+ if (!dateStr) return '';
539
+ const d = new Date(dateStr);
540
+ const months = ['January','February','March','April','May','June',
541
+ 'July','August','September','October','November','December'];
542
+ return `${months[d.getMonth()]} ${d.getDate()}, ${d.getFullYear()}`;
543
+ }
544
+
545
+ function formatTime(dateStr) {
546
+ if (!dateStr) return '';
547
+ const d = new Date(dateStr);
548
+ return d.toLocaleTimeString('en-US', { hour: '2-digit', minute: '2-digit', hour12: false });
549
+ }
550
+
551
+ function escapeHtml(text) {
552
+ if (!text) return '';
553
+ const div = document.createElement('div');
554
+ div.textContent = text;
555
+ return div.innerHTML;
556
+ }
557
+
558
+ function getDomain(url) {
559
+ try {
560
+ return new URL(url).hostname.replace('www.', '');
561
+ } catch {
562
+ return url.substring(0, 30);
563
+ }
564
+ }
565
+
566
+ // ===== Text Formatting =====
567
+ function formatMessageText(text, entities) {
568
+ if (!text) return '';
569
+ let html = escapeHtml(text);
570
+
571
+ // Auto-linkify URLs in text
572
+ html = html.replace(
573
+ /(https?:\/\/[^\s<]+)/g,
574
+ '<a href="$1" target="_blank" rel="noopener">$1</a>'
575
+ );
576
+
577
+ // Highlight @mentions
578
+ html = html.replace(
579
+ /@(\w{3,})/g,
580
+ '<span class="mention">@$1</span>'
581
+ );
582
+
583
+ // Highlight #hashtags
584
+ html = html.replace(
585
+ /#(\w{2,})/g,
586
+ '<span class="hashtag">#$1</span>'
587
+ );
588
+
589
+ // Convert newlines to <br>
590
+ html = html.replace(/\n/g, '<br>');
591
+
592
+ return html;
593
+ }
594
+
595
+ // ===== Render Message =====
596
+ function renderMessage(msg, prevMsg) {
597
+ const frag = document.createDocumentFragment();
598
+
599
+ // Date separator
600
+ const msgDate = msg.date ? msg.date.split('T')[0] : '';
601
+ const prevDate = prevMsg && prevMsg.date ? prevMsg.date.split('T')[0] : '';
602
+
603
+ if (msgDate !== prevDate) {
604
+ const sep = document.createElement('div');
605
+ sep.className = 'date-separator';
606
+ sep.innerHTML = `<div class="date-badge">${formatDate(msg.date)}</div>`;
607
+ frag.appendChild(sep);
608
+ }
609
+
610
+ // Joined message? (same user, same day, within 5 minutes)
611
+ const isJoined = prevMsg &&
612
+ prevMsg.from_id === msg.from_id &&
613
+ msgDate === prevDate &&
614
+ !msg.forwarded_from &&
615
+ !prevMsg.forwarded_from &&
616
+ timeDiffMinutes(prevMsg.date, msg.date) < 5;
617
+
618
+ const colorNum = getUserColor(msg.from_id);
619
+ const el = document.createElement('div');
620
+ el.className = `msg${isJoined ? ' joined' : ''}`;
621
+ el.id = `message${msg.message_id || msg.id}`;
622
+
623
+ let html = '';
624
+
625
+ // Avatar
626
+ html += `<div class="avatar-wrap">
627
+ <div class="avatar c${colorNum}">${getInitials(msg.from_name)}</div>
628
+ </div>`;
629
+
630
+ // Body
631
+ html += '<div class="msg-body">';
632
+
633
+ // Header (name + time) - only for first message in group
634
+ if (!isJoined) {
635
+ html += `<div class="msg-header">
636
+ <span class="msg-name name-c${colorNum}">${escapeHtml(msg.from_name || 'Unknown')}</span>
637
+ <span class="msg-time">${formatTime(msg.date)}</span>
638
+ ${msg.is_edited ? '<span class="msg-edited">edited</span>' : ''}
639
+ </div>`;
640
+ }
641
+
642
+ // Forward block
643
+ if (msg.forwarded_from) {
644
+ html += `<div class="forward-block">
645
+ <div class="forward-label">Forwarded message</div>
646
+ <div class="forward-name">${escapeHtml(msg.forwarded_from)}</div>
647
+ </div>`;
648
+ }
649
+
650
+ // Reply block
651
+ if (msg.reply_to_message_id && msg.reply_to_name) {
652
+ html += `<div class="reply-block" onclick="goToMessage(${msg.reply_to_message_id})">
653
+ <div class="reply-content">
654
+ <div class="reply-name">${escapeHtml(msg.reply_to_name)}</div>
655
+ <div class="reply-text">${escapeHtml(msg.reply_to_text || '')}</div>
656
+ </div>
657
+ </div>`;
658
+ }
659
+
660
+ // Message text
661
+ if (msg.text) {
662
+ html += `<div class="msg-text">${formatMessageText(msg.text, msg.entities)}</div>`;
663
+ }
664
+
665
+ // Entity links (extracted from DB)
666
+ const links = (msg.entities || []).filter(e => e.type === 'link' || e.type === 'text_link');
667
+ if (links.length > 0) {
668
+ html += '<div class="entity-links">';
669
+ const seen = new Set();
670
+ for (const link of links) {
671
+ const url = link.value;
672
+ if (seen.has(url)) continue;
673
+ seen.add(url);
674
+ // Skip if the link is already visible in the text
675
+ if (msg.text && msg.text.includes(url)) continue;
676
+ const domain = getDomain(url);
677
+ html += `<a class="entity-link" href="${escapeHtml(url)}" target="_blank" rel="noopener">
678
+ <span class="link-icon">🔗</span>
679
+ <span class="link-domain">${escapeHtml(domain)}</span>
680
+ </a>`;
681
+ }
682
+ html += '</div>';
683
+ }
684
+
685
+ // Media badge
686
+ if (msg.has_media) {
687
+ const icon = msg.has_photo ? '📷' : '📎';
688
+ const label = msg.has_photo ? 'Photo' : 'Media';
689
+ html += `<div class="media-badge"><span class="media-icon">${icon}</span> ${label}</div>`;
690
+ }
691
+
692
+ // Time for joined messages (shown on hover)
693
+ if (isJoined) {
694
+ html += `<div class="msg-time-inline">${formatTime(msg.date)}${msg.is_edited ? ' · edited' : ''}</div>`;
695
+ }
696
+
697
+ html += '</div>'; // close msg-body
698
+ el.innerHTML = html;
699
+ frag.appendChild(el);
700
+
701
+ return frag;
702
+ }
703
+
704
+ function timeDiffMinutes(dateStr1, dateStr2) {
705
+ if (!dateStr1 || !dateStr2) return 999;
706
+ return Math.abs(new Date(dateStr2) - new Date(dateStr1)) / 60000;
707
+ }
708
+
709
+ // ===== Render All =====
710
+ function renderAllMessages() {
711
+ const container = document.getElementById('messages-container');
712
+ container.innerHTML = '';
713
+ for (let i = 0; i < allMessages.length; i++) {
714
+ container.appendChild(renderMessage(allMessages[i], i > 0 ? allMessages[i-1] : null));
715
+ }
716
+ }
717
+
718
+ // ===== Load Messages =====
719
+ async function loadInitialMessages() {
720
+ if (loading) return;
721
+ loading = true;
722
+ document.getElementById('loading').style.display = 'block';
723
+
724
+ try {
725
+ const countRes = await fetch('/api/chat/messages?limit=1&offset=0');
726
+ const countData = await countRes.json();
727
+ totalMessages = countData.total || 0;
728
+
729
+ if (totalMessages === 0) {
730
+ document.getElementById('loading').style.display = 'none';
731
+ document.getElementById('messages-container').innerHTML =
732
+ '<div class="date-separator"><div class="date-badge">No messages found</div></div>';
733
+ loading = false;
734
+ return;
735
+ }
736
+
737
+ const startOffset = Math.max(0, totalMessages - BATCH_SIZE);
738
+ oldestOffset = startOffset;
739
+
740
+ const res = await fetch(`/api/chat/messages?limit=${BATCH_SIZE}&offset=${startOffset}`);
741
+ const data = await res.json();
742
+
743
+ if (data.messages && data.messages.length > 0) {
744
+ allMessages = data.messages;
745
+ renderAllMessages();
746
+ setTimeout(() => { scrollToBottom(); initialLoad = false; }, 100);
747
+
748
+ if (oldestOffset <= 0) {
749
+ document.getElementById('load-more-top').style.display = 'none';
750
+ }
751
+ }
752
+ } catch (e) {
753
+ console.error('Error loading messages:', e);
754
+ showToast('Error loading messages');
755
+ }
756
+
757
+ loading = false;
758
+ document.getElementById('loading').style.display = 'none';
759
+ }
760
+
761
+ async function loadOlderMessages() {
762
+ if (loading || oldestOffset <= 0) return;
763
+ loading = true;
764
+ document.getElementById('load-older-btn').disabled = true;
765
+
766
+ try {
767
+ const newOffset = Math.max(0, oldestOffset - BATCH_SIZE);
768
+ const limit = oldestOffset - newOffset;
769
+
770
+ const res = await fetch(`/api/chat/messages?limit=${limit}&offset=${newOffset}`);
771
+ const data = await res.json();
772
+
773
+ if (data.messages && data.messages.length > 0) {
774
+ const container = document.getElementById('messages-container');
775
+ const scrollBefore = container.scrollHeight;
776
+
777
+ allMessages = [...data.messages, ...allMessages];
778
+ oldestOffset = newOffset;
779
+ renderAllMessages();
780
+
781
+ const scrollAfter = container.scrollHeight;
782
+ window.scrollBy(0, scrollAfter - scrollBefore);
783
+
784
+ if (oldestOffset <= 0) {
785
+ document.getElementById('load-more-top').style.display = 'none';
786
+ }
787
+ }
788
+ } catch (e) {
789
+ console.error('Error loading older messages:', e);
790
+ showToast('Error loading messages');
791
+ }
792
+
793
+ loading = false;
794
+ document.getElementById('load-older-btn').disabled = false;
795
+ }
796
+
797
+ // ===== Navigation =====
798
+ function goToMessage(messageId) {
799
+ const el = document.getElementById(`message${messageId}`);
800
+ if (el) {
801
+ el.scrollIntoView({ behavior: 'smooth', block: 'center' });
802
+ el.classList.add('selected');
803
+ setTimeout(() => el.classList.remove('selected'), 2500);
804
+ } else {
805
+ showToast('Message not in current view');
806
+ }
807
+ }
808
+
809
+ function scrollToBottom() {
810
+ window.scrollTo({ top: document.body.scrollHeight, behavior: 'smooth' });
811
+ }
812
+
813
+ function showToast(message) {
814
+ const toast = document.getElementById('toast');
815
+ toast.textContent = message;
816
+ toast.classList.add('visible');
817
+ setTimeout(() => toast.classList.remove('visible'), 3000);
818
+ }
819
+
820
+ // Scroll button visibility
821
+ window.addEventListener('scroll', () => {
822
+ const btn = document.getElementById('scroll-bottom');
823
+ const dist = document.body.scrollHeight - window.scrollY - window.innerHeight;
824
+ btn.classList.toggle('visible', dist > 500);
825
+ });
826
+
827
+ // ===== Init =====
828
+ loadInitialMessages();
829
+ </script>
830
+ </body>
831
+ </html>
templates/index.html ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Telegram Analytics Dashboard</title>
7
+ <link rel="stylesheet" href="/static/css/style.css">
8
+ <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
9
+ </head>
10
+ <body>
11
+ <!-- Sidebar -->
12
+ <nav class="sidebar">
13
+ <div class="logo">
14
+ <span class="logo-icon">📊</span>
15
+ <span class="logo-text">TG Analytics</span>
16
+ </div>
17
+ <ul class="nav-menu">
18
+ <li class="nav-item active">
19
+ <a href="/" class="nav-link">
20
+ <span class="icon">📈</span>
21
+ <span>Overview</span>
22
+ </a>
23
+ </li>
24
+ <li class="nav-item">
25
+ <a href="/users" class="nav-link">
26
+ <span class="icon">👥</span>
27
+ <span>Users</span>
28
+ </a>
29
+ </li>
30
+ <li class="nav-item">
31
+ <a href="/chat" class="nav-link">
32
+ <span class="icon">💬</span>
33
+ <span>Chat</span>
34
+ </a>
35
+ </li>
36
+ <li class="nav-item">
37
+ <a href="/search" class="nav-link">
38
+ <span class="icon">🔍</span>
39
+ <span>Search</span>
40
+ </a>
41
+ </li>
42
+ <li class="nav-item">
43
+ <a href="/moderation" class="nav-link">
44
+ <span class="icon">🛡️</span>
45
+ <span>Moderation</span>
46
+ </a>
47
+ </li>
48
+ <li class="nav-item">
49
+ <a href="/settings" class="nav-link">
50
+ <span class="icon">⚙️</span>
51
+ <span>Settings</span>
52
+ </a>
53
+ </li>
54
+ </ul>
55
+ <div class="sidebar-footer">
56
+ <div class="export-buttons">
57
+ <button onclick="exportUsers()" class="btn btn-sm">📥 Export Users</button>
58
+ <button onclick="exportMessages()" class="btn btn-sm">📥 Export Messages</button>
59
+ </div>
60
+ </div>
61
+ </nav>
62
+
63
+ <!-- Main Content -->
64
+ <main class="main-content">
65
+ <!-- Header -->
66
+ <header class="header">
67
+ <h1>Dashboard Overview</h1>
68
+ <div class="header-controls">
69
+ <select id="timeframe" class="select" onchange="loadAllData()">
70
+ <option value="today">Today</option>
71
+ <option value="yesterday">Yesterday</option>
72
+ <option value="week">This Week</option>
73
+ <option value="month" selected>This Month</option>
74
+ <option value="year">This Year</option>
75
+ <option value="all">All Time</option>
76
+ </select>
77
+ <button onclick="loadAllData()" class="btn btn-primary">🔄 Refresh</button>
78
+ </div>
79
+ </header>
80
+
81
+ <!-- Stats Cards -->
82
+ <section class="stats-grid">
83
+ <div class="stat-card">
84
+ <div class="stat-icon">💬</div>
85
+ <div class="stat-content">
86
+ <div class="stat-value" id="total-messages">-</div>
87
+ <div class="stat-label">Total Messages</div>
88
+ </div>
89
+ </div>
90
+ <div class="stat-card">
91
+ <div class="stat-icon">👤</div>
92
+ <div class="stat-content">
93
+ <div class="stat-value" id="active-users">-</div>
94
+ <div class="stat-label">Active Users</div>
95
+ </div>
96
+ </div>
97
+ <div class="stat-card">
98
+ <div class="stat-icon">📅</div>
99
+ <div class="stat-content">
100
+ <div class="stat-value" id="messages-per-day">-</div>
101
+ <div class="stat-label">Messages/Day</div>
102
+ </div>
103
+ </div>
104
+ <div class="stat-card">
105
+ <div class="stat-icon">🔗</div>
106
+ <div class="stat-content">
107
+ <div class="stat-value" id="links-count">-</div>
108
+ <div class="stat-label">Links Shared</div>
109
+ </div>
110
+ </div>
111
+ <div class="stat-card">
112
+ <div class="stat-icon">🖼️</div>
113
+ <div class="stat-content">
114
+ <div class="stat-value" id="media-count">-</div>
115
+ <div class="stat-label">Media Shared</div>
116
+ </div>
117
+ </div>
118
+ <div class="stat-card">
119
+ <div class="stat-icon">↩️</div>
120
+ <div class="stat-content">
121
+ <div class="stat-value" id="replies-count">-</div>
122
+ <div class="stat-label">Replies</div>
123
+ </div>
124
+ </div>
125
+ </section>
126
+
127
+ <!-- Charts Row 1 -->
128
+ <section class="charts-row">
129
+ <div class="chart-card large">
130
+ <div class="chart-header">
131
+ <h3>Message Volume</h3>
132
+ <select id="messages-granularity" class="select-sm" onchange="loadMessagesChart()">
133
+ <option value="hour">Hourly</option>
134
+ <option value="day" selected>Daily</option>
135
+ <option value="week">Weekly</option>
136
+ </select>
137
+ </div>
138
+ <div class="chart-container">
139
+ <canvas id="messages-chart"></canvas>
140
+ </div>
141
+ </div>
142
+ <div class="chart-card">
143
+ <div class="chart-header">
144
+ <h3>Active Users</h3>
145
+ </div>
146
+ <div class="chart-container">
147
+ <canvas id="users-chart"></canvas>
148
+ </div>
149
+ </div>
150
+ </section>
151
+
152
+ <!-- Charts Row 2 -->
153
+ <section class="charts-row">
154
+ <div class="chart-card">
155
+ <div class="chart-header">
156
+ <h3>Activity by Hour</h3>
157
+ </div>
158
+ <div class="chart-container">
159
+ <canvas id="hourly-chart"></canvas>
160
+ </div>
161
+ </div>
162
+ <div class="chart-card">
163
+ <div class="chart-header">
164
+ <h3>Activity by Day</h3>
165
+ </div>
166
+ <div class="chart-container">
167
+ <canvas id="daily-chart"></canvas>
168
+ </div>
169
+ </div>
170
+ </section>
171
+
172
+ <!-- Heatmap -->
173
+ <section class="charts-row">
174
+ <div class="chart-card full-width">
175
+ <div class="chart-header">
176
+ <h3>Activity Heatmap</h3>
177
+ <span class="chart-subtitle">Hour of Day vs Day of Week</span>
178
+ </div>
179
+ <div class="heatmap-container" id="heatmap">
180
+ <!-- Heatmap will be rendered here -->
181
+ </div>
182
+ </div>
183
+ </section>
184
+
185
+ <!-- Top Lists -->
186
+ <section class="lists-row">
187
+ <div class="list-card">
188
+ <div class="list-header">
189
+ <h3>🏆 Top Users</h3>
190
+ <a href="/users" class="link">View All →</a>
191
+ </div>
192
+ <div class="list-content" id="top-users-list">
193
+ <!-- List will be rendered here -->
194
+ </div>
195
+ </div>
196
+ <div class="list-card">
197
+ <div class="list-header">
198
+ <h3>🔤 Top Words</h3>
199
+ </div>
200
+ <div class="list-content" id="top-words-list">
201
+ <!-- List will be rendered here -->
202
+ </div>
203
+ </div>
204
+ <div class="list-card">
205
+ <div class="list-header">
206
+ <h3>🌐 Top Domains</h3>
207
+ </div>
208
+ <div class="list-content" id="top-domains-list">
209
+ <!-- List will be rendered here -->
210
+ </div>
211
+ </div>
212
+ </section>
213
+ </main>
214
+
215
+ <script src="/static/js/dashboard.js"></script>
216
+ <script>
217
+ // Initialize
218
+ document.addEventListener('DOMContentLoaded', () => {
219
+ loadAllData();
220
+ });
221
+ </script>
222
+ </body>
223
+ </html>
templates/moderation.html ADDED
@@ -0,0 +1,459 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Moderation - Telegram Analytics</title>
7
+ <link rel="stylesheet" href="/static/css/style.css">
8
+ <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
9
+ </head>
10
+ <body>
11
+ <!-- Sidebar -->
12
+ <nav class="sidebar">
13
+ <div class="logo">
14
+ <span class="logo-icon">📊</span>
15
+ <span class="logo-text">TG Analytics</span>
16
+ </div>
17
+ <ul class="nav-menu">
18
+ <li class="nav-item">
19
+ <a href="/" class="nav-link">
20
+ <span class="icon">📈</span>
21
+ <span>Overview</span>
22
+ </a>
23
+ </li>
24
+ <li class="nav-item">
25
+ <a href="/users" class="nav-link">
26
+ <span class="icon">👥</span>
27
+ <span>Users</span>
28
+ </a>
29
+ </li>
30
+ <li class="nav-item">
31
+ <a href="/chat" class="nav-link">
32
+ <span class="icon">💬</span>
33
+ <span>Chat</span>
34
+ </a>
35
+ </li>
36
+ <li class="nav-item">
37
+ <a href="/search" class="nav-link">
38
+ <span class="icon">🔍</span>
39
+ <span>Search</span>
40
+ </a>
41
+ </li>
42
+ <li class="nav-item active">
43
+ <a href="/moderation" class="nav-link">
44
+ <span class="icon">🛡️</span>
45
+ <span>Moderation</span>
46
+ </a>
47
+ </li>
48
+ <li class="nav-item">
49
+ <a href="/settings" class="nav-link">
50
+ <span class="icon">⚙️</span>
51
+ <span>Settings</span>
52
+ </a>
53
+ </li>
54
+ </ul>
55
+ </nav>
56
+
57
+ <!-- Main Content -->
58
+ <main class="main-content">
59
+ <!-- Header -->
60
+ <header class="header">
61
+ <h1>Moderation & Content Analytics</h1>
62
+ <div class="header-controls">
63
+ <select id="timeframe" class="select" onchange="loadAllData()">
64
+ <option value="today">Today</option>
65
+ <option value="yesterday">Yesterday</option>
66
+ <option value="week">This Week</option>
67
+ <option value="month" selected>This Month</option>
68
+ <option value="year">This Year</option>
69
+ <option value="all">All Time</option>
70
+ </select>
71
+ <button onclick="loadAllData()" class="btn btn-primary">🔄 Refresh</button>
72
+ </div>
73
+ </header>
74
+
75
+ <!-- Content Stats -->
76
+ <section class="stats-grid">
77
+ <div class="stat-card">
78
+ <div class="stat-icon">🔗</div>
79
+ <div class="stat-content">
80
+ <div class="stat-value" id="total-links">-</div>
81
+ <div class="stat-label">Links Shared</div>
82
+ </div>
83
+ </div>
84
+ <div class="stat-card">
85
+ <div class="stat-icon">🖼️</div>
86
+ <div class="stat-content">
87
+ <div class="stat-value" id="total-media">-</div>
88
+ <div class="stat-label">Media Shared</div>
89
+ </div>
90
+ </div>
91
+ <div class="stat-card">
92
+ <div class="stat-icon">@</div>
93
+ <div class="stat-content">
94
+ <div class="stat-value" id="total-mentions">-</div>
95
+ <div class="stat-label">Mentions</div>
96
+ </div>
97
+ </div>
98
+ <div class="stat-card">
99
+ <div class="stat-icon">↪️</div>
100
+ <div class="stat-content">
101
+ <div class="stat-value" id="total-forwards">-</div>
102
+ <div class="stat-label">Forwards</div>
103
+ </div>
104
+ </div>
105
+ </section>
106
+
107
+ <!-- Charts Row -->
108
+ <section class="charts-row">
109
+ <div class="chart-card">
110
+ <div class="chart-header">
111
+ <h3>Top Shared Domains</h3>
112
+ </div>
113
+ <div class="chart-container">
114
+ <canvas id="domains-chart"></canvas>
115
+ </div>
116
+ </div>
117
+ <div class="chart-card">
118
+ <div class="chart-header">
119
+ <h3>Content Type Distribution</h3>
120
+ </div>
121
+ <div class="chart-container">
122
+ <canvas id="content-chart"></canvas>
123
+ </div>
124
+ </div>
125
+ </section>
126
+
127
+ <!-- Lists Row -->
128
+ <section class="lists-row">
129
+ <!-- Top Domains List -->
130
+ <div class="list-card">
131
+ <div class="list-header">
132
+ <h3>🌐 Top Domains</h3>
133
+ </div>
134
+ <div class="list-content" id="domains-list">
135
+ <div class="loading"><div class="spinner"></div></div>
136
+ </div>
137
+ </div>
138
+
139
+ <!-- Top Mentions List -->
140
+ <div class="list-card">
141
+ <div class="list-header">
142
+ <h3>@ Top Mentions</h3>
143
+ </div>
144
+ <div class="list-content" id="mentions-list">
145
+ <div class="loading"><div class="spinner"></div></div>
146
+ </div>
147
+ </div>
148
+
149
+ <!-- Top Words List -->
150
+ <div class="list-card">
151
+ <div class="list-header">
152
+ <h3>🔤 Top Words</h3>
153
+ </div>
154
+ <div class="list-content" id="words-list">
155
+ <div class="loading"><div class="spinner"></div></div>
156
+ </div>
157
+ </div>
158
+ </section>
159
+
160
+ <!-- Link Sharers -->
161
+ <section class="chart-card full-width">
162
+ <div class="chart-header">
163
+ <h3>Top Link Sharers</h3>
164
+ </div>
165
+ <div style="overflow-x: auto;">
166
+ <table class="users-table">
167
+ <thead>
168
+ <tr>
169
+ <th style="width: 60px;">Rank</th>
170
+ <th>User</th>
171
+ <th style="width: 120px;">Links</th>
172
+ <th style="width: 120px;">Media</th>
173
+ <th style="width: 120px;">Messages</th>
174
+ <th style="width: 150px;">Link Rate</th>
175
+ </tr>
176
+ </thead>
177
+ <tbody id="link-sharers-body">
178
+ <tr>
179
+ <td colspan="6" class="loading">
180
+ <div class="spinner"></div>
181
+ </td>
182
+ </tr>
183
+ </tbody>
184
+ </table>
185
+ </div>
186
+ </section>
187
+ </main>
188
+
189
+ <script>
190
+ // Chart instances
191
+ let domainsChart = null;
192
+ let contentChart = null;
193
+
194
+ // Initialize
195
+ document.addEventListener('DOMContentLoaded', () => {
196
+ loadAllData();
197
+ });
198
+
199
+ async function loadAllData() {
200
+ await Promise.all([
201
+ loadOverview(),
202
+ loadDomains(),
203
+ loadMentions(),
204
+ loadWords(),
205
+ loadLinkSharers()
206
+ ]);
207
+ }
208
+
209
+ async function loadOverview() {
210
+ const timeframe = document.getElementById('timeframe').value;
211
+
212
+ try {
213
+ const response = await fetch(`/api/overview?timeframe=${timeframe}`);
214
+ const data = await response.json();
215
+
216
+ document.getElementById('total-links').textContent = formatNumber(data.links_count);
217
+ document.getElementById('total-media').textContent = formatNumber(data.media_count);
218
+ document.getElementById('total-mentions').textContent = formatNumber(data.mentions_count);
219
+ document.getElementById('total-forwards').textContent = formatNumber(data.forwards_count);
220
+
221
+ // Update content distribution chart
222
+ renderContentChart(data);
223
+ } catch (error) {
224
+ console.error('Error loading overview:', error);
225
+ }
226
+ }
227
+
228
+ async function loadDomains() {
229
+ const timeframe = document.getElementById('timeframe').value;
230
+ const listDiv = document.getElementById('domains-list');
231
+
232
+ try {
233
+ const response = await fetch(`/api/top/domains?timeframe=${timeframe}&limit=15`);
234
+ const data = await response.json();
235
+
236
+ if (data.length === 0) {
237
+ listDiv.innerHTML = '<div class="empty-state">No domains found</div>';
238
+ return;
239
+ }
240
+
241
+ listDiv.innerHTML = data.map((item, i) => `
242
+ <div class="list-item">
243
+ <span class="list-rank ${i < 3 ? ['gold', 'silver', 'bronze'][i] : ''}">#${i + 1}</span>
244
+ <div class="list-info">
245
+ <div class="list-name">${escapeHtml(item.domain)}</div>
246
+ </div>
247
+ <span class="list-value">${formatNumber(item.count)}</span>
248
+ </div>
249
+ `).join('');
250
+
251
+ // Render domains chart
252
+ renderDomainsChart(data.slice(0, 8));
253
+ } catch (error) {
254
+ listDiv.innerHTML = '<div class="empty-state">Error loading domains</div>';
255
+ }
256
+ }
257
+
258
+ async function loadMentions() {
259
+ const timeframe = document.getElementById('timeframe').value;
260
+ const listDiv = document.getElementById('mentions-list');
261
+
262
+ try {
263
+ const response = await fetch(`/api/top/mentions?timeframe=${timeframe}&limit=15`);
264
+ const data = await response.json();
265
+
266
+ if (data.length === 0) {
267
+ listDiv.innerHTML = '<div class="empty-state">No mentions found</div>';
268
+ return;
269
+ }
270
+
271
+ listDiv.innerHTML = data.map((item, i) => `
272
+ <div class="list-item">
273
+ <span class="list-rank ${i < 3 ? ['gold', 'silver', 'bronze'][i] : ''}">#${i + 1}</span>
274
+ <div class="list-info">
275
+ <div class="list-name">@${escapeHtml(item.mention)}</div>
276
+ </div>
277
+ <span class="list-value">${formatNumber(item.count)}</span>
278
+ </div>
279
+ `).join('');
280
+ } catch (error) {
281
+ listDiv.innerHTML = '<div class="empty-state">Error loading mentions</div>';
282
+ }
283
+ }
284
+
285
+ async function loadWords() {
286
+ const timeframe = document.getElementById('timeframe').value;
287
+ const listDiv = document.getElementById('words-list');
288
+
289
+ try {
290
+ const response = await fetch(`/api/top/words?timeframe=${timeframe}&limit=15`);
291
+ const data = await response.json();
292
+
293
+ if (data.length === 0) {
294
+ listDiv.innerHTML = '<div class="empty-state">No words found</div>';
295
+ return;
296
+ }
297
+
298
+ listDiv.innerHTML = data.map((item, i) => `
299
+ <div class="list-item">
300
+ <span class="list-rank ${i < 3 ? ['gold', 'silver', 'bronze'][i] : ''}">#${i + 1}</span>
301
+ <div class="list-info">
302
+ <div class="list-name">${escapeHtml(item.word)}</div>
303
+ </div>
304
+ <span class="list-value">${formatNumber(item.count)}</span>
305
+ </div>
306
+ `).join('');
307
+ } catch (error) {
308
+ listDiv.innerHTML = '<div class="empty-state">Error loading words</div>';
309
+ }
310
+ }
311
+
312
+ async function loadLinkSharers() {
313
+ const timeframe = document.getElementById('timeframe').value;
314
+ const tbody = document.getElementById('link-sharers-body');
315
+
316
+ try {
317
+ const response = await fetch(`/api/users?timeframe=${timeframe}&limit=10`);
318
+ const data = await response.json();
319
+
320
+ // Sort by links
321
+ const users = data.users.sort((a, b) => b.links - a.links).slice(0, 10);
322
+
323
+ if (users.length === 0) {
324
+ tbody.innerHTML = '<tr><td colspan="6" class="empty-state">No data found</td></tr>';
325
+ return;
326
+ }
327
+
328
+ tbody.innerHTML = users.map((user, i) => {
329
+ const linkRate = user.messages > 0 ? ((user.links / user.messages) * 100).toFixed(1) : 0;
330
+ const rankClass = i === 0 ? 'gold' : i === 1 ? 'silver' : i === 2 ? 'bronze' : '';
331
+
332
+ return `
333
+ <tr>
334
+ <td><span class="list-rank ${rankClass}">#${i + 1}</span></td>
335
+ <td>
336
+ <div class="user-cell">
337
+ <div class="user-avatar">${user.name.charAt(0).toUpperCase()}</div>
338
+ <div>
339
+ <div class="list-name">${escapeHtml(user.name)}</div>
340
+ </div>
341
+ </div>
342
+ </td>
343
+ <td><strong>${formatNumber(user.links)}</strong></td>
344
+ <td>${formatNumber(user.media)}</td>
345
+ <td>${formatNumber(user.messages)}</td>
346
+ <td>
347
+ ${linkRate}%
348
+ <div class="progress-bar">
349
+ <div class="progress-fill" style="width: ${Math.min(linkRate * 2, 100)}%"></div>
350
+ </div>
351
+ </td>
352
+ </tr>
353
+ `;
354
+ }).join('');
355
+ } catch (error) {
356
+ tbody.innerHTML = '<tr><td colspan="6" class="empty-state">Error loading data</td></tr>';
357
+ }
358
+ }
359
+
360
+ function renderDomainsChart(data) {
361
+ const ctx = document.getElementById('domains-chart').getContext('2d');
362
+
363
+ if (domainsChart) domainsChart.destroy();
364
+
365
+ domainsChart = new Chart(ctx, {
366
+ type: 'bar',
367
+ data: {
368
+ labels: data.map(d => d.domain.substring(0, 15)),
369
+ datasets: [{
370
+ data: data.map(d => d.count),
371
+ backgroundColor: [
372
+ 'rgba(0, 136, 204, 0.8)',
373
+ 'rgba(40, 167, 69, 0.8)',
374
+ 'rgba(255, 193, 7, 0.8)',
375
+ 'rgba(220, 53, 69, 0.8)',
376
+ 'rgba(23, 162, 184, 0.8)',
377
+ 'rgba(108, 117, 125, 0.8)',
378
+ 'rgba(111, 66, 193, 0.8)',
379
+ 'rgba(253, 126, 20, 0.8)'
380
+ ],
381
+ borderWidth: 0
382
+ }]
383
+ },
384
+ options: {
385
+ indexAxis: 'y',
386
+ responsive: true,
387
+ maintainAspectRatio: false,
388
+ plugins: { legend: { display: false } },
389
+ scales: {
390
+ x: {
391
+ grid: { color: 'rgba(255, 255, 255, 0.1)' },
392
+ ticks: { color: '#a0aec0' }
393
+ },
394
+ y: {
395
+ grid: { display: false },
396
+ ticks: { color: '#a0aec0' }
397
+ }
398
+ }
399
+ }
400
+ });
401
+ }
402
+
403
+ function renderContentChart(data) {
404
+ const ctx = document.getElementById('content-chart').getContext('2d');
405
+
406
+ if (contentChart) contentChart.destroy();
407
+
408
+ const textOnly = data.total_messages - data.links_count - data.media_count;
409
+
410
+ contentChart = new Chart(ctx, {
411
+ type: 'doughnut',
412
+ data: {
413
+ labels: ['Text Only', 'With Links', 'With Media', 'Replies', 'Forwards'],
414
+ datasets: [{
415
+ data: [
416
+ Math.max(0, textOnly),
417
+ data.links_count,
418
+ data.media_count,
419
+ data.replies_count,
420
+ data.forwards_count
421
+ ],
422
+ backgroundColor: [
423
+ 'rgba(0, 136, 204, 0.8)',
424
+ 'rgba(40, 167, 69, 0.8)',
425
+ 'rgba(255, 193, 7, 0.8)',
426
+ 'rgba(23, 162, 184, 0.8)',
427
+ 'rgba(108, 117, 125, 0.8)'
428
+ ],
429
+ borderWidth: 0
430
+ }]
431
+ },
432
+ options: {
433
+ responsive: true,
434
+ maintainAspectRatio: false,
435
+ plugins: {
436
+ legend: {
437
+ position: 'right',
438
+ labels: { color: '#a0aec0' }
439
+ }
440
+ }
441
+ }
442
+ });
443
+ }
444
+
445
+ // Helper functions
446
+ function formatNumber(num) {
447
+ if (num >= 1000000) return (num / 1000000).toFixed(1) + 'M';
448
+ if (num >= 1000) return (num / 1000).toFixed(1) + 'K';
449
+ return num.toString();
450
+ }
451
+
452
+ function escapeHtml(text) {
453
+ const div = document.createElement('div');
454
+ div.textContent = text;
455
+ return div.innerHTML;
456
+ }
457
+ </script>
458
+ </body>
459
+ </html>
templates/search.html ADDED
@@ -0,0 +1,359 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Search - Telegram Analytics</title>
7
+ <link rel="stylesheet" href="/static/css/style.css">
8
+ </head>
9
+ <body>
10
+ <!-- Sidebar -->
11
+ <nav class="sidebar">
12
+ <div class="logo">
13
+ <span class="logo-icon">📊</span>
14
+ <span class="logo-text">TG Analytics</span>
15
+ </div>
16
+ <ul class="nav-menu">
17
+ <li class="nav-item">
18
+ <a href="/" class="nav-link">
19
+ <span class="icon">📈</span>
20
+ <span>Overview</span>
21
+ </a>
22
+ </li>
23
+ <li class="nav-item">
24
+ <a href="/users" class="nav-link">
25
+ <span class="icon">👥</span>
26
+ <span>Users</span>
27
+ </a>
28
+ </li>
29
+ <li class="nav-item">
30
+ <a href="/chat" class="nav-link">
31
+ <span class="icon">💬</span>
32
+ <span>Chat</span>
33
+ </a>
34
+ </li>
35
+ <li class="nav-item active">
36
+ <a href="/search" class="nav-link">
37
+ <span class="icon">🔍</span>
38
+ <span>Search</span>
39
+ </a>
40
+ </li>
41
+ <li class="nav-item">
42
+ <a href="/moderation" class="nav-link">
43
+ <span class="icon">🛡️</span>
44
+ <span>Moderation</span>
45
+ </a>
46
+ </li>
47
+ <li class="nav-item">
48
+ <a href="/settings" class="nav-link">
49
+ <span class="icon">⚙️</span>
50
+ <span>Settings</span>
51
+ </a>
52
+ </li>
53
+ </ul>
54
+ <div class="sidebar-footer">
55
+ <div class="export-buttons">
56
+ <button onclick="exportMessages()" class="btn btn-sm">📥 Export Messages</button>
57
+ </div>
58
+ </div>
59
+ </nav>
60
+
61
+ <!-- Main Content -->
62
+ <main class="main-content">
63
+ <!-- Header -->
64
+ <header class="header">
65
+ <h1>Search Messages</h1>
66
+ <div class="header-controls">
67
+ <select id="timeframe" class="select">
68
+ <option value="today">Today</option>
69
+ <option value="yesterday">Yesterday</option>
70
+ <option value="week">This Week</option>
71
+ <option value="month">This Month</option>
72
+ <option value="year">This Year</option>
73
+ <option value="all" selected>All Time</option>
74
+ </select>
75
+ </div>
76
+ </header>
77
+
78
+ <!-- Search Box -->
79
+ <section class="search-box">
80
+ <input type="search" id="search-input" class="search-input"
81
+ placeholder="Search messages... (supports Hebrew and English)"
82
+ onkeypress="if(event.key === 'Enter') performSearch()">
83
+ <button onclick="performSearch()" class="btn btn-primary">🔍 Search</button>
84
+ </section>
85
+
86
+ <!-- Search Tips -->
87
+ <section class="chart-card" style="margin-bottom: var(--spacing-xl);">
88
+ <div class="chart-header">
89
+ <h3>Search Tips</h3>
90
+ </div>
91
+ <div style="padding: var(--spacing-md); color: var(--text-secondary); font-size: 0.875rem;">
92
+ <ul style="list-style: none; display: grid; grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); gap: 1rem;">
93
+ <li><strong>word1 word2</strong> - Messages containing both words</li>
94
+ <li><strong>"exact phrase"</strong> - Messages with exact phrase</li>
95
+ <li><strong>word1 OR word2</strong> - Messages with either word</li>
96
+ <li><strong>word*</strong> - Prefix search (word, words, wording)</li>
97
+ <li><strong>NOT word</strong> - Exclude messages with word</li>
98
+ <li><strong>Hebrew supported</strong> - Full Hebrew text search</li>
99
+ </ul>
100
+ </div>
101
+ </section>
102
+
103
+ <!-- Search Stats -->
104
+ <section class="stats-grid" id="search-stats" style="display: none;">
105
+ <div class="stat-card">
106
+ <div class="stat-icon">🔍</div>
107
+ <div class="stat-content">
108
+ <div class="stat-value" id="result-count">0</div>
109
+ <div class="stat-label">Results Found</div>
110
+ </div>
111
+ </div>
112
+ <div class="stat-card">
113
+ <div class="stat-icon">⚡</div>
114
+ <div class="stat-content">
115
+ <div class="stat-value" id="search-time">0ms</div>
116
+ <div class="stat-label">Search Time</div>
117
+ </div>
118
+ </div>
119
+ </section>
120
+
121
+ <!-- Search Results -->
122
+ <section class="search-results" id="search-results">
123
+ <div class="empty-state">
124
+ <div class="empty-state-icon">🔍</div>
125
+ <p>Enter a search term to find messages</p>
126
+ </div>
127
+ </section>
128
+
129
+ <!-- Pagination -->
130
+ <div class="pagination" id="pagination"></div>
131
+ </main>
132
+
133
+ <script>
134
+ // State
135
+ let currentQuery = '';
136
+ let currentPage = 1;
137
+ const pageSize = 20;
138
+
139
+ async function performSearch(page = 1) {
140
+ const query = document.getElementById('search-input').value.trim();
141
+ const timeframe = document.getElementById('timeframe').value;
142
+
143
+ if (!query) {
144
+ document.getElementById('search-results').innerHTML = `
145
+ <div class="empty-state">
146
+ <div class="empty-state-icon">🔍</div>
147
+ <p>Enter a search term to find messages</p>
148
+ </div>
149
+ `;
150
+ document.getElementById('search-stats').style.display = 'none';
151
+ document.getElementById('pagination').innerHTML = '';
152
+ return;
153
+ }
154
+
155
+ currentQuery = query;
156
+ currentPage = page;
157
+
158
+ const resultsDiv = document.getElementById('search-results');
159
+ resultsDiv.innerHTML = '<div class="loading"><div class="spinner"></div></div>';
160
+
161
+ const startTime = performance.now();
162
+
163
+ try {
164
+ const offset = (page - 1) * pageSize;
165
+ const response = await fetch(
166
+ `/api/search?q=${encodeURIComponent(query)}&timeframe=${timeframe}&limit=${pageSize}&offset=${offset}`
167
+ );
168
+ const data = await response.json();
169
+
170
+ const endTime = performance.now();
171
+ const searchTime = Math.round(endTime - startTime);
172
+
173
+ // Show stats
174
+ document.getElementById('search-stats').style.display = 'grid';
175
+ document.getElementById('result-count').textContent = data.results.length +
176
+ (data.results.length === pageSize ? '+' : '');
177
+ document.getElementById('search-time').textContent = searchTime + 'ms';
178
+
179
+ if (data.results.length === 0) {
180
+ resultsDiv.innerHTML = `
181
+ <div class="empty-state">
182
+ <div class="empty-state-icon">😕</div>
183
+ <p>No messages found for "${escapeHtml(query)}"</p>
184
+ </div>
185
+ `;
186
+ document.getElementById('pagination').innerHTML = '';
187
+ return;
188
+ }
189
+
190
+ resultsDiv.innerHTML = data.results.map(result => `
191
+ <div class="search-result-item">
192
+ <div class="search-result-header">
193
+ <span class="search-result-author">${escapeHtml(result.from_name || 'Unknown')}</span>
194
+ <span class="search-result-date">${result.date}</span>
195
+ </div>
196
+ <div class="search-result-text">${highlightQuery(result.text, query)}</div>
197
+ <div style="margin-top: 0.5rem; font-size: 0.75rem; color: var(--text-muted);">
198
+ ${result.has_links ? '🔗 Link' : ''}
199
+ ${result.has_media ? '🖼️ Media' : ''}
200
+ </div>
201
+ </div>
202
+ `).join('');
203
+
204
+ // Simple pagination (since we don't have total count from FTS)
205
+ renderPagination(data.results.length === pageSize);
206
+
207
+ } catch (error) {
208
+ resultsDiv.innerHTML = `
209
+ <div class="empty-state">
210
+ <div class="empty-state-icon">❌</div>
211
+ <p>Error performing search. Please try again.</p>
212
+ </div>
213
+ `;
214
+ }
215
+ }
216
+
217
+ function renderPagination(hasMore) {
218
+ const pagination = document.getElementById('pagination');
219
+
220
+ if (currentPage === 1 && !hasMore) {
221
+ pagination.innerHTML = '';
222
+ return;
223
+ }
224
+
225
+ let html = '';
226
+
227
+ html += `<button class="page-btn" onclick="performSearch(${currentPage - 1})"
228
+ ${currentPage === 1 ? 'disabled' : ''}>&laquo; Previous</button>`;
229
+
230
+ html += `<span style="padding: 0 1rem; color: var(--text-muted);">Page ${currentPage}</span>`;
231
+
232
+ html += `<button class="page-btn" onclick="performSearch(${currentPage + 1})"
233
+ ${!hasMore ? 'disabled' : ''}>Next &raquo;</button>`;
234
+
235
+ pagination.innerHTML = html;
236
+ }
237
+
238
+ function highlightQuery(text, query) {
239
+ if (!text) return '';
240
+
241
+ // Escape HTML first
242
+ text = escapeHtml(text);
243
+
244
+ // Simple highlight for each word in query
245
+ const words = query.replace(/["*]/g, '').split(/\s+/).filter(w => w && w !== 'OR' && w !== 'NOT');
246
+
247
+ words.forEach(word => {
248
+ const regex = new RegExp(`(${escapeRegex(word)})`, 'gi');
249
+ text = text.replace(regex, '<span class="search-highlight">$1</span>');
250
+ });
251
+
252
+ return text;
253
+ }
254
+
255
+ function escapeHtml(text) {
256
+ const div = document.createElement('div');
257
+ div.textContent = text;
258
+ return div.innerHTML;
259
+ }
260
+
261
+ function escapeRegex(string) {
262
+ return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
263
+ }
264
+
265
+ function exportMessages() {
266
+ const timeframe = document.getElementById('timeframe').value;
267
+ window.location.href = `/api/export/messages?timeframe=${timeframe}`;
268
+ }
269
+
270
+ // AI Search
271
+ async function aiSearch() {
272
+ const query = document.getElementById('ai-query').value.trim();
273
+ const mode = document.getElementById('ai-mode').value;
274
+ if (!query) return;
275
+
276
+ const resultDiv = document.getElementById('ai-result');
277
+ const answerDiv = document.getElementById('ai-answer');
278
+ const sqlPre = document.getElementById('ai-sql');
279
+
280
+ resultDiv.style.display = 'block';
281
+ const loadingMessages = {
282
+ 'context': 'קורא הודעות ומנתח...',
283
+ 'semantic': 'מחפש לפי משמעות + שולח ל-AI...',
284
+ 'sql': 'מחפש...',
285
+ 'auto': 'מחפש...'
286
+ };
287
+ answerDiv.textContent = loadingMessages[mode] || 'מחפש...';
288
+ sqlPre.textContent = '';
289
+
290
+ try {
291
+ const response = await fetch('/api/ai/search', {
292
+ method: 'POST',
293
+ headers: { 'Content-Type': 'application/json' },
294
+ body: JSON.stringify({ query, mode })
295
+ });
296
+
297
+ const data = await response.json();
298
+
299
+ if (data.error) {
300
+ answerDiv.innerHTML = `<span style="color:#ff6b6b;">שגיאה: ${escapeHtml(data.error)}</span>`;
301
+ } else {
302
+ let html = escapeHtml(data.answer || 'לא נמצאה תשובה');
303
+
304
+ // Show mode info
305
+ if (data.mode === 'context_search') {
306
+ html += `<br><br><small style="color:var(--text-muted);">🧠 Hybrid Search: קראתי ${data.context_messages} הודעות`;
307
+ if (data.context_user) html += ` מ"${escapeHtml(data.context_user)}"`;
308
+ if (data.keywords_used && data.keywords_used.length > 0) {
309
+ html += `<br>🔑 מילות מפתח: ${data.keywords_used.slice(0, 5).join(', ')}`;
310
+ }
311
+ html += `</small>`;
312
+ } else if (data.mode === 'semantic_ai' || data.mode === 'semantic') {
313
+ html += `<br><br><small style="color:var(--text-muted);">🔮 Semantic + AI: נמצאו ${data.count} הודעות דומות`;
314
+ if (data.total_with_threads && data.total_with_threads > data.count) {
315
+ html += ` + ${data.total_with_threads - data.count} הודעות מ-threads`;
316
+ }
317
+ html += `</small>`;
318
+ }
319
+
320
+ answerDiv.innerHTML = html;
321
+ sqlPre.textContent = data.sql || '';
322
+
323
+ // If results contain messages, optionally populate main search
324
+ if (data.results && data.results.length > 0 && data.results[0].text) {
325
+ displayAIResults(data.results);
326
+ }
327
+ }
328
+ } catch (error) {
329
+ answerDiv.textContent = `שגיאה: ${error.message}`;
330
+ }
331
+ }
332
+
333
+ function displayAIResults(results) {
334
+ const resultsDiv = document.getElementById('search-results');
335
+
336
+ if (results.length === 0) return;
337
+
338
+ document.getElementById('search-stats').style.display = 'grid';
339
+ document.getElementById('result-count').textContent = results.length;
340
+ document.getElementById('search-time').textContent = 'AI';
341
+
342
+ resultsDiv.innerHTML = results.slice(0, 20).map(result => `
343
+ <div class="search-result-item">
344
+ <div class="search-result-header">
345
+ <span class="search-result-author">${escapeHtml(result.from_name || 'Unknown')}</span>
346
+ <span class="search-result-date">${result.date || ''}${result.score ? ` (${(result.score * 100).toFixed(0)}% דמיון)` : ''}</span>
347
+ </div>
348
+ <div class="search-result-text">${escapeHtml(result.text || '')}</div>
349
+ </div>
350
+ `).join('');
351
+ }
352
+
353
+ // Focus search input on page load
354
+ document.addEventListener('DOMContentLoaded', () => {
355
+ document.getElementById('search-input').focus();
356
+ });
357
+ </script>
358
+ </body>
359
+ </html>
templates/settings.html ADDED
@@ -0,0 +1,444 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="he" dir="rtl">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Settings - Telegram Analytics</title>
7
+ <link rel="stylesheet" href="/static/css/style.css">
8
+ <style>
9
+ .upload-zone {
10
+ border: 2px dashed var(--border-color);
11
+ border-radius: var(--radius-lg);
12
+ padding: var(--spacing-xl);
13
+ text-align: center;
14
+ transition: all 0.3s ease;
15
+ cursor: pointer;
16
+ margin-bottom: var(--spacing-xl);
17
+ }
18
+
19
+ .upload-zone:hover,
20
+ .upload-zone.dragover {
21
+ border-color: var(--primary);
22
+ background: rgba(0, 136, 204, 0.1);
23
+ }
24
+
25
+ .upload-zone-icon {
26
+ font-size: 3rem;
27
+ margin-bottom: var(--spacing-md);
28
+ }
29
+
30
+ .upload-zone-text {
31
+ color: var(--text-secondary);
32
+ margin-bottom: var(--spacing-sm);
33
+ }
34
+
35
+ .upload-zone-hint {
36
+ font-size: 0.75rem;
37
+ color: var(--text-muted);
38
+ }
39
+
40
+ .upload-progress {
41
+ display: none;
42
+ margin-top: var(--spacing-lg);
43
+ }
44
+
45
+ .upload-progress.active {
46
+ display: block;
47
+ }
48
+
49
+ .progress-bar-container {
50
+ background: var(--bg-sidebar);
51
+ border-radius: var(--radius-md);
52
+ height: 20px;
53
+ overflow: hidden;
54
+ }
55
+
56
+ .progress-bar-fill {
57
+ height: 100%;
58
+ background: var(--primary);
59
+ transition: width 0.3s ease;
60
+ display: flex;
61
+ align-items: center;
62
+ justify-content: center;
63
+ color: white;
64
+ font-size: 0.75rem;
65
+ }
66
+
67
+ .upload-result {
68
+ display: none;
69
+ margin-top: var(--spacing-lg);
70
+ padding: var(--spacing-lg);
71
+ border-radius: var(--radius-lg);
72
+ }
73
+
74
+ .upload-result.success {
75
+ display: block;
76
+ background: rgba(40, 167, 69, 0.2);
77
+ border: 1px solid var(--success);
78
+ }
79
+
80
+ .upload-result.error {
81
+ display: block;
82
+ background: rgba(220, 53, 69, 0.2);
83
+ border: 1px solid var(--danger);
84
+ }
85
+
86
+ .result-title {
87
+ font-weight: 600;
88
+ margin-bottom: var(--spacing-md);
89
+ display: flex;
90
+ align-items: center;
91
+ gap: var(--spacing-sm);
92
+ }
93
+
94
+ .result-stats {
95
+ display: grid;
96
+ grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
97
+ gap: var(--spacing-md);
98
+ }
99
+
100
+ .result-stat {
101
+ text-align: center;
102
+ padding: var(--spacing-md);
103
+ background: var(--bg-sidebar);
104
+ border-radius: var(--radius-md);
105
+ }
106
+
107
+ .result-stat-value {
108
+ font-size: 1.5rem;
109
+ font-weight: 700;
110
+ color: var(--primary);
111
+ }
112
+
113
+ .result-stat-label {
114
+ font-size: 0.75rem;
115
+ color: var(--text-muted);
116
+ }
117
+
118
+ .db-stats {
119
+ display: grid;
120
+ grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
121
+ gap: var(--spacing-md);
122
+ margin-bottom: var(--spacing-xl);
123
+ }
124
+
125
+ .db-stat {
126
+ background: var(--bg-card);
127
+ border-radius: var(--radius-lg);
128
+ padding: var(--spacing-lg);
129
+ border: 1px solid var(--border-color);
130
+ }
131
+
132
+ .db-stat-value {
133
+ font-size: 1.75rem;
134
+ font-weight: 700;
135
+ color: var(--primary);
136
+ }
137
+
138
+ .db-stat-label {
139
+ font-size: 0.875rem;
140
+ color: var(--text-muted);
141
+ margin-top: var(--spacing-xs);
142
+ }
143
+
144
+ .instructions {
145
+ background: var(--bg-card);
146
+ border-radius: var(--radius-lg);
147
+ padding: var(--spacing-lg);
148
+ border: 1px solid var(--border-color);
149
+ }
150
+
151
+ .instructions h3 {
152
+ margin-bottom: var(--spacing-md);
153
+ }
154
+
155
+ .instructions ol {
156
+ padding-right: var(--spacing-lg);
157
+ color: var(--text-secondary);
158
+ line-height: 1.8;
159
+ }
160
+
161
+ .instructions code {
162
+ background: var(--bg-sidebar);
163
+ padding: 2px 6px;
164
+ border-radius: var(--radius-sm);
165
+ font-family: monospace;
166
+ }
167
+ </style>
168
+ </head>
169
+ <body>
170
+ <!-- Sidebar -->
171
+ <nav class="sidebar">
172
+ <div class="logo">
173
+ <span class="logo-icon">📊</span>
174
+ <span class="logo-text">TG Analytics</span>
175
+ </div>
176
+ <ul class="nav-menu">
177
+ <li class="nav-item">
178
+ <a href="/" class="nav-link">
179
+ <span class="icon">📈</span>
180
+ <span>Overview</span>
181
+ </a>
182
+ </li>
183
+ <li class="nav-item">
184
+ <a href="/users" class="nav-link">
185
+ <span class="icon">👥</span>
186
+ <span>Users</span>
187
+ </a>
188
+ </li>
189
+ <li class="nav-item">
190
+ <a href="/chat" class="nav-link">
191
+ <span class="icon">💬</span>
192
+ <span>Chat</span>
193
+ </a>
194
+ </li>
195
+ <li class="nav-item">
196
+ <a href="/search" class="nav-link">
197
+ <span class="icon">🔍</span>
198
+ <span>Search</span>
199
+ </a>
200
+ </li>
201
+ <li class="nav-item">
202
+ <a href="/moderation" class="nav-link">
203
+ <span class="icon">🛡️</span>
204
+ <span>Moderation</span>
205
+ </a>
206
+ </li>
207
+ <li class="nav-item active">
208
+ <a href="/settings" class="nav-link">
209
+ <span class="icon">⚙️</span>
210
+ <span>Settings</span>
211
+ </a>
212
+ </li>
213
+ </ul>
214
+ </nav>
215
+
216
+ <!-- Main Content -->
217
+ <main class="main-content">
218
+ <!-- Header -->
219
+ <header class="header">
220
+ <h1>⚙️ Settings & Update Data</h1>
221
+ </header>
222
+
223
+ <!-- Database Stats -->
224
+ <section>
225
+ <h2 style="margin-bottom: var(--spacing-md);">📊 Database Status</h2>
226
+ <div class="db-stats" id="db-stats">
227
+ <div class="db-stat">
228
+ <div class="db-stat-value" id="stat-messages">-</div>
229
+ <div class="db-stat-label">Total Messages</div>
230
+ </div>
231
+ <div class="db-stat">
232
+ <div class="db-stat-value" id="stat-users">-</div>
233
+ <div class="db-stat-label">Total Users</div>
234
+ </div>
235
+ <div class="db-stat">
236
+ <div class="db-stat-value" id="stat-first">-</div>
237
+ <div class="db-stat-label">First Message</div>
238
+ </div>
239
+ <div class="db-stat">
240
+ <div class="db-stat-value" id="stat-last">-</div>
241
+ <div class="db-stat-label">Last Message</div>
242
+ </div>
243
+ <div class="db-stat">
244
+ <div class="db-stat-value" id="stat-size">-</div>
245
+ <div class="db-stat-label">Database Size</div>
246
+ </div>
247
+ </div>
248
+ </section>
249
+
250
+ <!-- Upload Section (disabled - updates done locally) -->
251
+ <section class="chart-card" style="margin-bottom: var(--spacing-xl); opacity: 0.6;">
252
+ <div class="chart-header">
253
+ <h3>📤 Update Database</h3>
254
+ </div>
255
+ <div style="padding: var(--spacing-lg); text-align: center; color: var(--text-muted);">
256
+ <p>עדכוני מסד הנתונים מתבצעים מקומית באמצעות daily_sync.py</p>
257
+ </div>
258
+ </section>
259
+
260
+ <!-- Instructions -->
261
+ <section class="instructions">
262
+ <h3>📖 איך לייצא נתונים מטלגרם</h3>
263
+ <ol>
264
+ <li>פתח את <strong>Telegram Desktop</strong> (לא ניתן מהאפליקציה הניידת)</li>
265
+ <li>לך ל-<strong>Settings → Advanced → Export Telegram data</strong></li>
266
+ <li>בחר את הקבוצה/צ'אט שברצונך לייצא</li>
267
+ <li>סמן <strong>JSON</strong> כפורמט הייצוא</li>
268
+ <li>לחץ <strong>Export</strong> והמתן לסיום</li>
269
+ <li>העלה את קובץ <code>result.json</code> כאן</li>
270
+ </ol>
271
+ <div style="margin-top: var(--spacing-lg); padding: var(--spacing-md); background: var(--bg-sidebar); border-radius: var(--radius-md);">
272
+ <strong>💡 טיפ:</strong> המערכת תזהה אוטומטית הודעות כפולות ותוסיף רק הודעות חדשות.
273
+ אין צורך לדאוג מהעלאת אותו קובץ פעמיים.
274
+ </div>
275
+ </section>
276
+
277
+ <!-- CLI Instructions -->
278
+ <section class="instructions" style="margin-top: var(--spacing-xl);">
279
+ <h3>💻 עדכון דרך שורת הפקודה</h3>
280
+ <p style="color: var(--text-secondary); margin-bottom: var(--spacing-md);">
281
+ לקבצים גדולים, מומלץ להשתמש בשורת הפקודה:
282
+ </p>
283
+ <pre style="background: var(--bg-sidebar); padding: var(--spacing-md); border-radius: var(--radius-md); overflow-x: auto; direction: ltr; text-align: left;">
284
+ # עדכון database קיים עם JSON חדש
285
+ python indexer.py new_export.json --db telegram.db --update
286
+
287
+ # יצירת database חדש
288
+ python indexer.py result.json --db telegram.db
289
+ </pre>
290
+ </section>
291
+ </main>
292
+
293
+ <script>
294
+ // Load database stats on page load
295
+ document.addEventListener('DOMContentLoaded', loadDbStats);
296
+
297
+ async function loadDbStats() {
298
+ try {
299
+ const response = await fetch('/api/db/stats');
300
+ const stats = await response.json();
301
+
302
+ document.getElementById('stat-messages').textContent =
303
+ stats.total_messages?.toLocaleString() || '-';
304
+ document.getElementById('stat-users').textContent =
305
+ stats.total_users?.toLocaleString() || '-';
306
+ document.getElementById('stat-first').textContent =
307
+ stats.first_message ? new Date(stats.first_message).toLocaleDateString('he-IL') : '-';
308
+ document.getElementById('stat-last').textContent =
309
+ stats.last_message ? new Date(stats.last_message).toLocaleDateString('he-IL') : '-';
310
+ document.getElementById('stat-size').textContent =
311
+ stats.db_size_mb ? `${stats.db_size_mb} MB` : '-';
312
+ } catch (error) {
313
+ console.error('Error loading db stats:', error);
314
+ }
315
+ }
316
+
317
+ // Drag and drop handlers
318
+ const uploadZone = document.getElementById('upload-zone');
319
+
320
+ uploadZone.addEventListener('dragover', (e) => {
321
+ e.preventDefault();
322
+ uploadZone.classList.add('dragover');
323
+ });
324
+
325
+ uploadZone.addEventListener('dragleave', () => {
326
+ uploadZone.classList.remove('dragover');
327
+ });
328
+
329
+ uploadZone.addEventListener('drop', (e) => {
330
+ e.preventDefault();
331
+ uploadZone.classList.remove('dragover');
332
+
333
+ const files = e.dataTransfer.files;
334
+ if (files.length > 0) {
335
+ uploadFile(files[0]);
336
+ }
337
+ });
338
+
339
+ function handleFileSelect(event) {
340
+ const file = event.target.files[0];
341
+ if (file) {
342
+ uploadFile(file);
343
+ }
344
+ }
345
+
346
+ async function uploadFile(file) {
347
+ if (!file.name.endsWith('.json')) {
348
+ showError('נא לבחור קובץ JSON בלבד');
349
+ return;
350
+ }
351
+
352
+ const progressDiv = document.getElementById('upload-progress');
353
+ const progressFill = document.getElementById('progress-fill');
354
+ const progressText = document.getElementById('progress-text');
355
+ const resultDiv = document.getElementById('upload-result');
356
+
357
+ // Reset and show progress
358
+ progressDiv.classList.add('active');
359
+ resultDiv.className = 'upload-result';
360
+ progressFill.style.width = '0%';
361
+ progressFill.textContent = '0%';
362
+ progressText.textContent = `מעלה ${file.name}...`;
363
+
364
+ try {
365
+ // Read file
366
+ progressFill.style.width = '20%';
367
+ progressFill.textContent = '20%';
368
+ progressText.textContent = 'קורא קובץ...';
369
+
370
+ const formData = new FormData();
371
+ formData.append('file', file);
372
+
373
+ // Upload
374
+ progressFill.style.width = '50%';
375
+ progressFill.textContent = '50%';
376
+ progressText.textContent = 'מעבד נתונים...';
377
+
378
+ const response = await fetch('/api/update', {
379
+ method: 'POST',
380
+ body: formData
381
+ });
382
+
383
+ const result = await response.json();
384
+
385
+ progressFill.style.width = '100%';
386
+ progressFill.textContent = '100%';
387
+
388
+ if (result.success) {
389
+ showSuccess(result.stats);
390
+ loadDbStats(); // Refresh stats
391
+ } else {
392
+ showError(result.error || 'שגיאה לא ידועה');
393
+ }
394
+
395
+ } catch (error) {
396
+ showError(error.message);
397
+ }
398
+
399
+ // Hide progress after a delay
400
+ setTimeout(() => {
401
+ progressDiv.classList.remove('active');
402
+ }, 1000);
403
+ }
404
+
405
+ function showSuccess(stats) {
406
+ const resultDiv = document.getElementById('upload-result');
407
+ const resultTitle = document.getElementById('result-title');
408
+ const resultStats = document.getElementById('result-stats');
409
+
410
+ resultDiv.className = 'upload-result success';
411
+ resultTitle.innerHTML = '✅ העדכון הושלם בהצלחה!';
412
+
413
+ resultStats.innerHTML = `
414
+ <div class="result-stat">
415
+ <div class="result-stat-value">${stats.total_in_file?.toLocaleString() || 0}</div>
416
+ <div class="result-stat-label">הודעות בקובץ</div>
417
+ </div>
418
+ <div class="result-stat">
419
+ <div class="result-stat-value">${stats.new_messages?.toLocaleString() || 0}</div>
420
+ <div class="result-stat-label">הודעות חדשות נוספו</div>
421
+ </div>
422
+ <div class="result-stat">
423
+ <div class="result-stat-value">${stats.duplicates?.toLocaleString() || 0}</div>
424
+ <div class="result-stat-label">כפילויות (דולגו)</div>
425
+ </div>
426
+ <div class="result-stat">
427
+ <div class="result-stat-value">${stats.elapsed_seconds?.toFixed(1) || 0}s</div>
428
+ <div class="result-stat-label">זמן עיבוד</div>
429
+ </div>
430
+ `;
431
+ }
432
+
433
+ function showError(message) {
434
+ const resultDiv = document.getElementById('upload-result');
435
+ const resultTitle = document.getElementById('result-title');
436
+ const resultStats = document.getElementById('result-stats');
437
+
438
+ resultDiv.className = 'upload-result error';
439
+ resultTitle.innerHTML = `❌ שגיאה: ${message}`;
440
+ resultStats.innerHTML = '';
441
+ }
442
+ </script>
443
+ </body>
444
+ </html>
templates/user_profile.html ADDED
@@ -0,0 +1,721 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>User Profile - Telegram Analytics</title>
7
+ <link rel="stylesheet" href="/static/css/style.css">
8
+ <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
9
+ <style>
10
+ /* Profile-specific styles */
11
+ .profile-header {
12
+ display: flex;
13
+ align-items: center;
14
+ gap: 2rem;
15
+ margin-bottom: 2rem;
16
+ padding: 2rem;
17
+ background: var(--bg-card);
18
+ border-radius: var(--radius-lg);
19
+ border: 1px solid var(--border-color);
20
+ }
21
+
22
+ .profile-avatar {
23
+ width: 100px;
24
+ height: 100px;
25
+ border-radius: 50%;
26
+ background: var(--primary);
27
+ display: flex;
28
+ align-items: center;
29
+ justify-content: center;
30
+ font-size: 2.5rem;
31
+ font-weight: 700;
32
+ flex-shrink: 0;
33
+ }
34
+
35
+ .profile-info { flex: 1; }
36
+
37
+ .profile-name {
38
+ font-size: 1.75rem;
39
+ font-weight: 700;
40
+ margin-bottom: 0.25rem;
41
+ }
42
+
43
+ .profile-meta {
44
+ color: var(--text-muted);
45
+ font-size: 0.875rem;
46
+ display: flex;
47
+ gap: 1rem;
48
+ flex-wrap: wrap;
49
+ margin-top: 0.5rem;
50
+ }
51
+
52
+ .profile-meta span {
53
+ display: inline-flex;
54
+ align-items: center;
55
+ gap: 0.25rem;
56
+ }
57
+
58
+ .badge {
59
+ display: inline-block;
60
+ padding: 0.15rem 0.5rem;
61
+ border-radius: 4px;
62
+ font-size: 0.75rem;
63
+ font-weight: 600;
64
+ }
65
+
66
+ .badge-creator { background: #ffd700; color: #1a1a2e; }
67
+ .badge-admin { background: #28a745; color: white; }
68
+ .badge-bot { background: #6c757d; color: white; }
69
+ .badge-premium { background: #9b59b6; color: white; }
70
+ .badge-online { background: #28a745; color: white; }
71
+ .badge-recently { background: #17a2b8; color: white; }
72
+ .badge-offline { background: var(--border-color); color: var(--text-muted); }
73
+
74
+ .profile-stats {
75
+ display: grid;
76
+ grid-template-columns: repeat(auto-fit, minmax(140px, 1fr));
77
+ gap: 1rem;
78
+ margin-bottom: 2rem;
79
+ }
80
+
81
+ .profile-stat-card {
82
+ background: var(--bg-card);
83
+ border: 1px solid var(--border-color);
84
+ border-radius: var(--radius-md);
85
+ padding: 1rem;
86
+ text-align: center;
87
+ }
88
+
89
+ .profile-stat-value {
90
+ font-size: 1.5rem;
91
+ font-weight: 700;
92
+ color: var(--primary);
93
+ }
94
+
95
+ .profile-stat-label {
96
+ font-size: 0.75rem;
97
+ color: var(--text-muted);
98
+ margin-top: 0.25rem;
99
+ }
100
+
101
+ .profile-grid {
102
+ display: grid;
103
+ grid-template-columns: repeat(2, 1fr);
104
+ gap: 1.5rem;
105
+ margin-bottom: 1.5rem;
106
+ }
107
+
108
+ .profile-card {
109
+ background: var(--bg-card);
110
+ border: 1px solid var(--border-color);
111
+ border-radius: var(--radius-lg);
112
+ padding: 1.5rem;
113
+ }
114
+
115
+ .profile-card h3 {
116
+ font-size: 1rem;
117
+ margin-bottom: 1rem;
118
+ color: var(--text-primary);
119
+ display: flex;
120
+ align-items: center;
121
+ gap: 0.5rem;
122
+ }
123
+
124
+ .profile-card.full-width {
125
+ grid-column: span 2;
126
+ }
127
+
128
+ .reply-network-list {
129
+ list-style: none;
130
+ }
131
+
132
+ .reply-network-item {
133
+ display: flex;
134
+ justify-content: space-between;
135
+ align-items: center;
136
+ padding: 0.5rem 0;
137
+ border-bottom: 1px solid var(--border-color);
138
+ }
139
+
140
+ .reply-network-item:last-child {
141
+ border-bottom: none;
142
+ }
143
+
144
+ .reply-network-name {
145
+ display: flex;
146
+ align-items: center;
147
+ gap: 0.5rem;
148
+ }
149
+
150
+ .reply-network-name a {
151
+ color: var(--primary);
152
+ text-decoration: none;
153
+ }
154
+
155
+ .reply-network-name a:hover {
156
+ text-decoration: underline;
157
+ }
158
+
159
+ .reply-network-count {
160
+ font-weight: 600;
161
+ color: var(--text-secondary);
162
+ }
163
+
164
+ .reply-bar {
165
+ height: 4px;
166
+ background: var(--border-color);
167
+ border-radius: 2px;
168
+ margin-top: 4px;
169
+ }
170
+
171
+ .reply-bar-fill {
172
+ height: 100%;
173
+ background: var(--primary);
174
+ border-radius: 2px;
175
+ }
176
+
177
+ .links-list {
178
+ list-style: none;
179
+ }
180
+
181
+ .links-list li {
182
+ padding: 0.5rem 0;
183
+ border-bottom: 1px solid var(--border-color);
184
+ display: flex;
185
+ justify-content: space-between;
186
+ align-items: center;
187
+ }
188
+
189
+ .links-list li:last-child { border-bottom: none; }
190
+
191
+ .links-list a {
192
+ color: var(--primary);
193
+ text-decoration: none;
194
+ word-break: break-all;
195
+ font-size: 0.875rem;
196
+ }
197
+
198
+ .links-list a:hover { text-decoration: underline; }
199
+
200
+ .links-list .count {
201
+ font-weight: 600;
202
+ color: var(--text-muted);
203
+ flex-shrink: 0;
204
+ margin-left: 1rem;
205
+ }
206
+
207
+ .no-messages {
208
+ text-align: center;
209
+ padding: 3rem;
210
+ background: var(--bg-card);
211
+ border-radius: var(--radius-lg);
212
+ border: 1px solid var(--border-color);
213
+ }
214
+
215
+ .no-messages h2 {
216
+ margin-bottom: 0.5rem;
217
+ color: var(--text-muted);
218
+ }
219
+
220
+ .forward-source {
221
+ display: flex;
222
+ justify-content: space-between;
223
+ align-items: center;
224
+ padding: 0.5rem 0;
225
+ border-bottom: 1px solid var(--border-color);
226
+ }
227
+
228
+ .forward-source:last-child { border-bottom: none; }
229
+
230
+ .time-info {
231
+ font-size: 0.875rem;
232
+ color: var(--text-secondary);
233
+ padding: 0.5rem 0;
234
+ display: flex;
235
+ justify-content: space-between;
236
+ }
237
+
238
+ @media (max-width: 992px) {
239
+ .profile-grid {
240
+ grid-template-columns: 1fr;
241
+ }
242
+ .profile-card.full-width {
243
+ grid-column: span 1;
244
+ }
245
+ .profile-header {
246
+ flex-direction: column;
247
+ text-align: center;
248
+ }
249
+ .profile-meta {
250
+ justify-content: center;
251
+ }
252
+ }
253
+ </style>
254
+ </head>
255
+ <body>
256
+ <!-- Sidebar -->
257
+ <nav class="sidebar">
258
+ <div class="logo">
259
+ <span class="logo-icon">📊</span>
260
+ <span class="logo-text">TG Analytics</span>
261
+ </div>
262
+ <ul class="nav-menu">
263
+ <li class="nav-item">
264
+ <a href="/" class="nav-link">
265
+ <span class="icon">📈</span>
266
+ <span>Overview</span>
267
+ </a>
268
+ </li>
269
+ <li class="nav-item active">
270
+ <a href="/users" class="nav-link">
271
+ <span class="icon">👥</span>
272
+ <span>Users</span>
273
+ </a>
274
+ </li>
275
+ <li class="nav-item">
276
+ <a href="/chat" class="nav-link">
277
+ <span class="icon">💬</span>
278
+ <span>Chat</span>
279
+ </a>
280
+ </li>
281
+ <li class="nav-item">
282
+ <a href="/search" class="nav-link">
283
+ <span class="icon">🔍</span>
284
+ <span>Search</span>
285
+ </a>
286
+ </li>
287
+ <li class="nav-item">
288
+ <a href="/moderation" class="nav-link">
289
+ <span class="icon">🛡️</span>
290
+ <span>Moderation</span>
291
+ </a>
292
+ </li>
293
+ <li class="nav-item">
294
+ <a href="/settings" class="nav-link">
295
+ <span class="icon">⚙️</span>
296
+ <span>Settings</span>
297
+ </a>
298
+ </li>
299
+ </ul>
300
+ </nav>
301
+
302
+ <!-- Main Content -->
303
+ <main class="main-content">
304
+ <header class="header">
305
+ <h1><a href="/users" style="color: var(--text-muted); text-decoration: none;">&larr; Users</a></h1>
306
+ </header>
307
+
308
+ <div id="profile-content">
309
+ <div class="loading"><div class="spinner"></div></div>
310
+ </div>
311
+ </main>
312
+
313
+ <script>
314
+ const USER_ID = '{{ user_id }}';
315
+ const COLORS = ['#e17076','#7bc862','#e5ca77','#65aadd','#a695e7','#ee7aae','#6ec9cb','#faa774'];
316
+
317
+ function getAvatarColor(name) {
318
+ let hash = 0;
319
+ for (let i = 0; i < name.length; i++) hash = name.charCodeAt(i) + ((hash << 5) - hash);
320
+ return COLORS[Math.abs(hash) % COLORS.length];
321
+ }
322
+
323
+ function formatNumber(num) {
324
+ if (num === null || num === undefined) return '-';
325
+ if (num >= 1000000) return (num / 1000000).toFixed(1) + 'M';
326
+ if (num >= 1000) return (num / 1000).toFixed(1) + 'K';
327
+ return num.toLocaleString();
328
+ }
329
+
330
+ function formatDate(ts) {
331
+ if (!ts) return '-';
332
+ const d = new Date(ts * 1000);
333
+ return d.toLocaleDateString('he-IL', { year: 'numeric', month: 'short', day: 'numeric' });
334
+ }
335
+
336
+ function formatDuration(seconds) {
337
+ if (!seconds) return '-';
338
+ if (seconds < 60) return Math.round(seconds) + 's';
339
+ if (seconds < 3600) return Math.round(seconds / 60) + 'm';
340
+ return (seconds / 3600).toFixed(1) + 'h';
341
+ }
342
+
343
+ function escapeHtml(text) {
344
+ const div = document.createElement('div');
345
+ div.textContent = text;
346
+ return div.innerHTML;
347
+ }
348
+
349
+ document.addEventListener('DOMContentLoaded', loadProfile);
350
+
351
+ async function loadProfile() {
352
+ const container = document.getElementById('profile-content');
353
+ try {
354
+ const resp = await fetch(`/api/user/${USER_ID}/profile`);
355
+ const data = await resp.json();
356
+
357
+ if (data.error) {
358
+ container.innerHTML = `<div class="empty-state"><h2>User not found</h2><p>${data.error}</p></div>`;
359
+ return;
360
+ }
361
+
362
+ if (!data.has_messages && data.participant) {
363
+ renderInactiveProfile(container, data);
364
+ return;
365
+ }
366
+
367
+ renderFullProfile(container, data);
368
+ } catch (err) {
369
+ container.innerHTML = `<div class="empty-state">Error loading profile: ${err.message}</div>`;
370
+ }
371
+ }
372
+
373
+ function renderInactiveProfile(container, data) {
374
+ const p = data.participant;
375
+ const name = data.name || 'Unknown';
376
+ const color = getAvatarColor(name);
377
+ const initial = name.charAt(0).toUpperCase();
378
+
379
+ let badges = '';
380
+ if (p.is_creator) badges += ' <span class="badge badge-creator">Creator</span>';
381
+ if (p.is_admin && !p.is_creator) badges += ' <span class="badge badge-admin">Admin</span>';
382
+ if (p.is_bot) badges += ' <span class="badge badge-bot">Bot</span>';
383
+ if (p.is_premium) badges += ' <span class="badge badge-premium">Premium</span>';
384
+
385
+ container.innerHTML = `
386
+ <div class="profile-header">
387
+ <div class="profile-avatar" style="background: ${color}">${initial}</div>
388
+ <div class="profile-info">
389
+ <div class="profile-name">${escapeHtml(name)}${badges}</div>
390
+ ${p.username ? `<div style="color: var(--primary);">@${escapeHtml(p.username)}</div>` : ''}
391
+ <div class="profile-meta">
392
+ ${p.join_date ? `<span>Joined: ${formatDate(p.join_date)}</span>` : ''}
393
+ <span>Status: <span class="badge badge-${p.last_status === 'online' ? 'online' : p.last_status === 'recently' ? 'recently' : 'offline'}">${p.last_status}</span></span>
394
+ </div>
395
+ </div>
396
+ </div>
397
+ <div class="no-messages">
398
+ <h2>No Messages</h2>
399
+ <p style="color: var(--text-muted);">This participant hasn't sent any messages in the group.</p>
400
+ </div>
401
+ `;
402
+ }
403
+
404
+ function renderFullProfile(container, data) {
405
+ const name = data.name || 'Unknown';
406
+ const color = getAvatarColor(name);
407
+ const initial = name.charAt(0).toUpperCase();
408
+ const p = data.participant;
409
+
410
+ // Badges
411
+ let badges = '';
412
+ if (p) {
413
+ if (p.is_creator) badges += ' <span class="badge badge-creator">Creator</span>';
414
+ if (p.is_admin && !p.is_creator) badges += ' <span class="badge badge-admin">Admin</span>';
415
+ if (p.is_bot) badges += ' <span class="badge badge-bot">Bot</span>';
416
+ if (p.is_premium) badges += ' <span class="badge badge-premium">Premium</span>';
417
+ }
418
+
419
+ // Header
420
+ let html = `
421
+ <div class="profile-header">
422
+ <div class="profile-avatar" style="background: ${color}">${initial}</div>
423
+ <div class="profile-info">
424
+ <div class="profile-name">${escapeHtml(name)}${badges}</div>
425
+ ${p && p.username ? `<div style="color: var(--primary);">@${escapeHtml(p.username)}</div>` : ''}
426
+ <div class="profile-meta">
427
+ <span>#${data.rank} of ${data.total_active_users}</span>
428
+ <span>ID: ${data.user_id}</span>
429
+ ${p && p.join_date ? `<span>Joined: ${formatDate(p.join_date)}</span>` : ''}
430
+ ${p ? `<span>Status: <span class="badge badge-${p.last_status === 'online' ? 'online' : p.last_status === 'recently' ? 'recently' : 'offline'}">${p.last_status}</span></span>` : ''}
431
+ </div>
432
+ </div>
433
+ </div>
434
+ `;
435
+
436
+ // Stats grid
437
+ html += `
438
+ <div class="profile-stats">
439
+ <div class="profile-stat-card">
440
+ <div class="profile-stat-value">${formatNumber(data.total_messages)}</div>
441
+ <div class="profile-stat-label">Messages</div>
442
+ </div>
443
+ <div class="profile-stat-card">
444
+ <div class="profile-stat-value">${formatNumber(data.total_characters)}</div>
445
+ <div class="profile-stat-label">Characters</div>
446
+ </div>
447
+ <div class="profile-stat-card">
448
+ <div class="profile-stat-value">${data.avg_message_length}</div>
449
+ <div class="profile-stat-label">Avg Length</div>
450
+ </div>
451
+ <div class="profile-stat-card">
452
+ <div class="profile-stat-value">${data.active_days}</div>
453
+ <div class="profile-stat-label">Active Days</div>
454
+ </div>
455
+ <div class="profile-stat-card">
456
+ <div class="profile-stat-value">${data.daily_average}</div>
457
+ <div class="profile-stat-label">Daily Avg</div>
458
+ </div>
459
+ <div class="profile-stat-card">
460
+ <div class="profile-stat-value">${formatNumber(data.total_replies_sent)}</div>
461
+ <div class="profile-stat-label">Replies Sent</div>
462
+ </div>
463
+ <div class="profile-stat-card">
464
+ <div class="profile-stat-value">${formatNumber(data.total_replies_received)}</div>
465
+ <div class="profile-stat-label">Replies Received</div>
466
+ </div>
467
+ <div class="profile-stat-card">
468
+ <div class="profile-stat-value">${data.reply_ratio}%</div>
469
+ <div class="profile-stat-label">Reply Rate</div>
470
+ </div>
471
+ <div class="profile-stat-card">
472
+ <div class="profile-stat-value">${formatDuration(data.avg_reply_time_seconds)}</div>
473
+ <div class="profile-stat-label">Avg Reply Time</div>
474
+ </div>
475
+ <div class="profile-stat-card">
476
+ <div class="profile-stat-value">${formatNumber(data.links_shared)}</div>
477
+ <div class="profile-stat-label">Links</div>
478
+ </div>
479
+ <div class="profile-stat-card">
480
+ <div class="profile-stat-value">${formatNumber(data.media_sent)}</div>
481
+ <div class="profile-stat-label">Media</div>
482
+ </div>
483
+ <div class="profile-stat-card">
484
+ <div class="profile-stat-value">${formatNumber(data.forwards_sent)}</div>
485
+ <div class="profile-stat-label">Forwards</div>
486
+ </div>
487
+ </div>
488
+ `;
489
+
490
+ // Time info
491
+ html += `
492
+ <div class="profile-card full-width" style="margin-bottom: 1.5rem;">
493
+ <h3>Timeline</h3>
494
+ <div class="time-info">
495
+ <span>First message: ${formatDate(data.first_message)}</span>
496
+ <span>Last message: ${formatDate(data.last_message)}</span>
497
+ </div>
498
+ <div class="time-info">
499
+ <span>Edits: ${formatNumber(data.edits)}</span>
500
+ <span>Mentions: ${formatNumber(data.mentions_made)}</span>
501
+ </div>
502
+ </div>
503
+ `;
504
+
505
+ // Charts + Reply network
506
+ html += `<div class="profile-grid">`;
507
+
508
+ // Hourly chart
509
+ html += `
510
+ <div class="profile-card">
511
+ <h3>Activity by Hour</h3>
512
+ <div style="height: 200px;"><canvas id="hourly-chart"></canvas></div>
513
+ </div>
514
+ `;
515
+
516
+ // Weekday chart
517
+ html += `
518
+ <div class="profile-card">
519
+ <h3>Activity by Day of Week</h3>
520
+ <div style="height: 200px;"><canvas id="weekday-chart"></canvas></div>
521
+ </div>
522
+ `;
523
+
524
+ // Monthly trend
525
+ html += `
526
+ <div class="profile-card full-width">
527
+ <h3>Monthly Trend</h3>
528
+ <div style="height: 200px;"><canvas id="monthly-chart"></canvas></div>
529
+ </div>
530
+ `;
531
+
532
+ // Daily activity (last 90 days)
533
+ html += `
534
+ <div class="profile-card full-width">
535
+ <h3>Daily Activity (Last 90 Days)</h3>
536
+ <div style="height: 200px;"><canvas id="daily-chart"></canvas></div>
537
+ </div>
538
+ `;
539
+
540
+ // Replies to (top 10)
541
+ const maxReplyTo = data.replies_to.length > 0 ? data.replies_to[0].count : 1;
542
+ html += `
543
+ <div class="profile-card">
544
+ <h3>Most Replies To</h3>
545
+ ${data.replies_to.length === 0 ? '<p style="color: var(--text-muted);">No reply data</p>' : ''}
546
+ <ul class="reply-network-list">
547
+ ${data.replies_to.map(r => `
548
+ <li class="reply-network-item">
549
+ <div class="reply-network-name">
550
+ <a href="/user/${r.user_id}">${escapeHtml(r.name)}</a>
551
+ </div>
552
+ <span class="reply-network-count">${r.count}</span>
553
+ </li>
554
+ <div class="reply-bar"><div class="reply-bar-fill" style="width: ${(r.count / maxReplyTo * 100).toFixed(1)}%"></div></div>
555
+ `).join('')}
556
+ </ul>
557
+ </div>
558
+ `;
559
+
560
+ // Replies from (top 10)
561
+ const maxReplyFrom = data.replies_from.length > 0 ? data.replies_from[0].count : 1;
562
+ html += `
563
+ <div class="profile-card">
564
+ <h3>Most Replies From</h3>
565
+ ${data.replies_from.length === 0 ? '<p style="color: var(--text-muted);">No reply data</p>' : ''}
566
+ <ul class="reply-network-list">
567
+ ${data.replies_from.map(r => `
568
+ <li class="reply-network-item">
569
+ <div class="reply-network-name">
570
+ <a href="/user/${r.user_id}">${escapeHtml(r.name)}</a>
571
+ </div>
572
+ <span class="reply-network-count">${r.count}</span>
573
+ </li>
574
+ <div class="reply-bar"><div class="reply-bar-fill" style="width: ${(r.count / maxReplyFrom * 100).toFixed(1)}%; background: #28a745;"></div></div>
575
+ `).join('')}
576
+ </ul>
577
+ </div>
578
+ `;
579
+
580
+ // Top forward sources
581
+ if (data.top_forward_sources && data.top_forward_sources.length > 0) {
582
+ html += `
583
+ <div class="profile-card">
584
+ <h3>Top Forward Sources</h3>
585
+ ${data.top_forward_sources.map(f => `
586
+ <div class="forward-source">
587
+ <span>${escapeHtml(f.name)}</span>
588
+ <span class="reply-network-count">${f.count}</span>
589
+ </div>
590
+ `).join('')}
591
+ </div>
592
+ `;
593
+ }
594
+
595
+ // Top links
596
+ if (data.top_links && data.top_links.length > 0) {
597
+ html += `
598
+ <div class="profile-card">
599
+ <h3>Top Links Shared</h3>
600
+ <ul class="links-list">
601
+ ${data.top_links.map(l => `
602
+ <li>
603
+ <a href="${escapeHtml(l.url)}" target="_blank" rel="noopener">${escapeHtml(l.url.length > 50 ? l.url.substring(0, 50) + '...' : l.url)}</a>
604
+ <span class="count">${l.count}x</span>
605
+ </li>
606
+ `).join('')}
607
+ </ul>
608
+ </div>
609
+ `;
610
+ }
611
+
612
+ html += `</div>`; // close profile-grid
613
+
614
+ container.innerHTML = html;
615
+
616
+ // Render charts
617
+ renderHourlyChart(data.hourly_activity);
618
+ renderWeekdayChart(data.weekday_activity);
619
+ renderMonthlyChart(data.monthly_activity);
620
+ renderDailyChart(data.daily_activity);
621
+ }
622
+
623
+ function chartDefaults() {
624
+ return {
625
+ responsive: true,
626
+ maintainAspectRatio: false,
627
+ plugins: { legend: { display: false } },
628
+ scales: {
629
+ y: {
630
+ beginAtZero: true,
631
+ grid: { color: 'rgba(255,255,255,0.05)' },
632
+ ticks: { color: '#718096' }
633
+ },
634
+ x: {
635
+ grid: { display: false },
636
+ ticks: { color: '#718096', maxRotation: 0, autoSkip: true, maxTicksLimit: 12 }
637
+ }
638
+ }
639
+ };
640
+ }
641
+
642
+ function renderHourlyChart(hourly) {
643
+ const ctx = document.getElementById('hourly-chart');
644
+ if (!ctx) return;
645
+ new Chart(ctx.getContext('2d'), {
646
+ type: 'bar',
647
+ data: {
648
+ labels: Array.from({length: 24}, (_, i) => `${i}:00`),
649
+ datasets: [{
650
+ data: hourly,
651
+ backgroundColor: 'rgba(0, 136, 204, 0.6)',
652
+ borderColor: 'rgba(0, 136, 204, 1)',
653
+ borderWidth: 1
654
+ }]
655
+ },
656
+ options: chartDefaults()
657
+ });
658
+ }
659
+
660
+ function renderWeekdayChart(weekday) {
661
+ const ctx = document.getElementById('weekday-chart');
662
+ if (!ctx) return;
663
+ new Chart(ctx.getContext('2d'), {
664
+ type: 'bar',
665
+ data: {
666
+ labels: weekday.map(w => w.day.substring(0, 3)),
667
+ datasets: [{
668
+ data: weekday.map(w => w.count),
669
+ backgroundColor: weekday.map((w, i) => i === 5 || i === 6
670
+ ? 'rgba(40, 167, 69, 0.6)'
671
+ : 'rgba(0, 136, 204, 0.6)'),
672
+ borderWidth: 1
673
+ }]
674
+ },
675
+ options: chartDefaults()
676
+ });
677
+ }
678
+
679
+ function renderMonthlyChart(monthly) {
680
+ const ctx = document.getElementById('monthly-chart');
681
+ if (!ctx) return;
682
+ new Chart(ctx.getContext('2d'), {
683
+ type: 'line',
684
+ data: {
685
+ labels: monthly.map(m => m.month),
686
+ datasets: [{
687
+ data: monthly.map(m => m.count),
688
+ borderColor: '#0088cc',
689
+ backgroundColor: 'rgba(0, 136, 204, 0.1)',
690
+ fill: true,
691
+ tension: 0.3,
692
+ pointRadius: 3,
693
+ pointHoverRadius: 6
694
+ }]
695
+ },
696
+ options: chartDefaults()
697
+ });
698
+ }
699
+
700
+ function renderDailyChart(daily) {
701
+ const ctx = document.getElementById('daily-chart');
702
+ if (!ctx) return;
703
+ // Reverse to chronological order
704
+ const sorted = [...daily].reverse();
705
+ new Chart(ctx.getContext('2d'), {
706
+ type: 'bar',
707
+ data: {
708
+ labels: sorted.map(d => d.date.substring(5)), // MM-DD
709
+ datasets: [{
710
+ data: sorted.map(d => d.count),
711
+ backgroundColor: 'rgba(0, 136, 204, 0.4)',
712
+ borderColor: 'rgba(0, 136, 204, 0.8)',
713
+ borderWidth: 1
714
+ }]
715
+ },
716
+ options: chartDefaults()
717
+ });
718
+ }
719
+ </script>
720
+ </body>
721
+ </html>
templates/users.html ADDED
@@ -0,0 +1,344 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Users - Telegram Analytics</title>
7
+ <link rel="stylesheet" href="/static/css/style.css">
8
+ <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
9
+ </head>
10
+ <body>
11
+ <!-- Sidebar -->
12
+ <nav class="sidebar">
13
+ <div class="logo">
14
+ <span class="logo-icon">📊</span>
15
+ <span class="logo-text">TG Analytics</span>
16
+ </div>
17
+ <ul class="nav-menu">
18
+ <li class="nav-item">
19
+ <a href="/" class="nav-link">
20
+ <span class="icon">📈</span>
21
+ <span>Overview</span>
22
+ </a>
23
+ </li>
24
+ <li class="nav-item active">
25
+ <a href="/users" class="nav-link">
26
+ <span class="icon">👥</span>
27
+ <span>Users</span>
28
+ </a>
29
+ </li>
30
+ <li class="nav-item">
31
+ <a href="/chat" class="nav-link">
32
+ <span class="icon">💬</span>
33
+ <span>Chat</span>
34
+ </a>
35
+ </li>
36
+ <li class="nav-item">
37
+ <a href="/search" class="nav-link">
38
+ <span class="icon">🔍</span>
39
+ <span>Search</span>
40
+ </a>
41
+ </li>
42
+ <li class="nav-item">
43
+ <a href="/moderation" class="nav-link">
44
+ <span class="icon">🛡️</span>
45
+ <span>Moderation</span>
46
+ </a>
47
+ </li>
48
+ <li class="nav-item">
49
+ <a href="/settings" class="nav-link">
50
+ <span class="icon">⚙️</span>
51
+ <span>Settings</span>
52
+ </a>
53
+ </li>
54
+ </ul>
55
+ <div class="sidebar-footer">
56
+ <div class="export-buttons">
57
+ <button onclick="exportUsers()" class="btn btn-sm">📥 Export Users</button>
58
+ </div>
59
+ </div>
60
+ </nav>
61
+
62
+ <!-- Main Content -->
63
+ <main class="main-content">
64
+ <!-- Header -->
65
+ <header class="header">
66
+ <h1>User Leaderboard</h1>
67
+ <div class="header-controls">
68
+ <select id="timeframe" class="select" onchange="loadUsers()">
69
+ <option value="today">Today</option>
70
+ <option value="yesterday">Yesterday</option>
71
+ <option value="week">This Week</option>
72
+ <option value="month" selected>This Month</option>
73
+ <option value="year">This Year</option>
74
+ <option value="all">All Time</option>
75
+ </select>
76
+ <button onclick="loadUsers()" class="btn btn-primary">🔄 Refresh</button>
77
+ </div>
78
+ </header>
79
+
80
+ <!-- User Stats Summary -->
81
+ <section class="stats-grid">
82
+ <div class="stat-card">
83
+ <div class="stat-icon">👥</div>
84
+ <div class="stat-content">
85
+ <div class="stat-value" id="total-users">-</div>
86
+ <div class="stat-label">Total Members</div>
87
+ </div>
88
+ </div>
89
+ <div class="stat-card">
90
+ <div class="stat-icon">💬</div>
91
+ <div class="stat-content">
92
+ <div class="stat-value" id="total-active">-</div>
93
+ <div class="stat-label">Active Users</div>
94
+ </div>
95
+ </div>
96
+ <div class="stat-card">
97
+ <div class="stat-icon">🏆</div>
98
+ <div class="stat-content">
99
+ <div class="stat-value" id="top-user">-</div>
100
+ <div class="stat-label">Top User</div>
101
+ </div>
102
+ </div>
103
+ <div class="stat-card">
104
+ <div class="stat-icon">📊</div>
105
+ <div class="stat-content">
106
+ <div class="stat-value" id="avg-messages">-</div>
107
+ <div class="stat-label">Avg Messages/User</div>
108
+ </div>
109
+ </div>
110
+ </section>
111
+
112
+ <!-- Users Table -->
113
+ <section class="chart-card full-width">
114
+ <div class="chart-header">
115
+ <h3>All Users</h3>
116
+ <div style="display: flex; gap: 1rem; align-items: center;">
117
+ <input type="search" id="user-search" placeholder="Search users..."
118
+ style="width: 200px;" onkeyup="filterUsers()">
119
+ <span id="showing-count" style="color: var(--text-muted); font-size: 0.875rem;"></span>
120
+ </div>
121
+ </div>
122
+ <div style="overflow-x: auto;">
123
+ <table class="users-table">
124
+ <thead>
125
+ <tr>
126
+ <th style="width: 60px;">Rank</th>
127
+ <th>User</th>
128
+ <th style="width: 80px;">Role</th>
129
+ <th style="width: 120px;">Messages</th>
130
+ <th style="width: 100px;">Share</th>
131
+ <th style="width: 100px;">Links</th>
132
+ <th style="width: 100px;">Media</th>
133
+ <th style="width: 100px;">Active Days</th>
134
+ <th style="width: 100px;">Daily Avg</th>
135
+ </tr>
136
+ </thead>
137
+ <tbody id="users-table-body">
138
+ <tr>
139
+ <td colspan="8" class="loading">
140
+ <div class="spinner"></div>
141
+ </td>
142
+ </tr>
143
+ </tbody>
144
+ </table>
145
+ </div>
146
+ <!-- Pagination -->
147
+ <div class="pagination" id="pagination"></div>
148
+ </section>
149
+ </main>
150
+
151
+ <script src="/static/js/dashboard.js"></script>
152
+ <script>
153
+ // State
154
+ let allUsers = [];
155
+ let currentPage = 1;
156
+ const pageSize = 20;
157
+
158
+ // Initialize
159
+ document.addEventListener('DOMContentLoaded', () => {
160
+ loadUsers();
161
+ });
162
+
163
+ async function loadUsers() {
164
+ const timeframe = document.getElementById('timeframe').value;
165
+ const tbody = document.getElementById('users-table-body');
166
+ tbody.innerHTML = '<tr><td colspan="9" class="loading"><div class="spinner"></div></td></tr>';
167
+
168
+ try {
169
+ const response = await fetch(`/api/users?timeframe=${timeframe}&limit=500&include_inactive=1`);
170
+ const data = await response.json();
171
+ allUsers = data.users;
172
+
173
+ // Update summary stats
174
+ document.getElementById('total-users').textContent = formatNumber(data.total);
175
+ document.getElementById('total-active').textContent = formatNumber(data.total_active);
176
+ if (allUsers.length > 0) {
177
+ const activeUsers = allUsers.filter(u => u.messages > 0);
178
+ if (activeUsers.length > 0) {
179
+ document.getElementById('top-user').textContent = activeUsers[0].name;
180
+ const totalMessages = activeUsers.reduce((sum, u) => sum + u.messages, 0);
181
+ document.getElementById('avg-messages').textContent =
182
+ formatNumber(Math.round(totalMessages / activeUsers.length));
183
+ }
184
+ }
185
+
186
+ currentPage = 1;
187
+ renderUsers();
188
+ } catch (error) {
189
+ tbody.innerHTML = '<tr><td colspan="9" class="empty-state">Error loading users</td></tr>';
190
+ }
191
+ }
192
+
193
+ function filterUsers() {
194
+ currentPage = 1;
195
+ renderUsers();
196
+ }
197
+
198
+ function renderUsers() {
199
+ const search = document.getElementById('user-search').value.toLowerCase();
200
+ const filtered = allUsers.filter(u =>
201
+ u.name.toLowerCase().includes(search) ||
202
+ u.user_id.toLowerCase().includes(search)
203
+ );
204
+
205
+ const start = (currentPage - 1) * pageSize;
206
+ const end = start + pageSize;
207
+ const pageUsers = filtered.slice(start, end);
208
+
209
+ document.getElementById('showing-count').textContent =
210
+ `Showing ${start + 1}-${Math.min(end, filtered.length)} of ${filtered.length}`;
211
+
212
+ const tbody = document.getElementById('users-table-body');
213
+
214
+ if (pageUsers.length === 0) {
215
+ tbody.innerHTML = '<tr><td colspan="9" class="empty-state">No users found</td></tr>';
216
+ return;
217
+ }
218
+
219
+ tbody.innerHTML = pageUsers.map((user, i) => {
220
+ const rank = user.rank || '-';
221
+ const rankClass = rank === 1 ? 'gold' : rank === 2 ? 'silver' : rank === 3 ? 'bronze' : '';
222
+ const initial = user.name.charAt(0).toUpperCase();
223
+ const isInactive = user.messages === 0;
224
+ const rowStyle = isInactive ? 'opacity: 0.6;' : '';
225
+
226
+ let roleBadge = '';
227
+ if (user.role === 'creator') roleBadge = '<span style="background:#ffd700;color:#1a1a2e;padding:2px 6px;border-radius:4px;font-size:0.7rem;font-weight:600;">Creator</span>';
228
+ else if (user.role === 'admin') roleBadge = '<span style="background:#28a745;color:white;padding:2px 6px;border-radius:4px;font-size:0.7rem;font-weight:600;">Admin</span>';
229
+ else if (user.role === 'bot') roleBadge = '<span style="background:#6c757d;color:white;padding:2px 6px;border-radius:4px;font-size:0.7rem;font-weight:600;">Bot</span>';
230
+
231
+ const subtitle = user.username
232
+ ? `@${escapeHtml(user.username)}`
233
+ : `ID: ${user.user_id}`;
234
+
235
+ return `
236
+ <tr onclick="window.location.href='/user/${user.user_id}'" style="cursor: pointer; ${rowStyle}">
237
+ <td><span class="list-rank ${rankClass}">${rank !== '-' ? '#' + rank : '-'}</span></td>
238
+ <td>
239
+ <div class="user-cell">
240
+ <div class="user-avatar">${initial}</div>
241
+ <div>
242
+ <div class="list-name">${escapeHtml(user.name)}</div>
243
+ <div class="list-subtitle">${subtitle}</div>
244
+ </div>
245
+ </div>
246
+ </td>
247
+ <td>${roleBadge}</td>
248
+ <td>
249
+ ${isInactive ? '<span style="color: var(--text-muted);">-</span>' : `
250
+ <strong>${formatNumber(user.messages)}</strong>
251
+ <div class="progress-bar">
252
+ <div class="progress-fill" style="width: ${user.percentage}%"></div>
253
+ </div>`}
254
+ </td>
255
+ <td>${isInactive ? '-' : user.percentage + '%'}</td>
256
+ <td>${isInactive ? '-' : formatNumber(user.links)}</td>
257
+ <td>${isInactive ? '-' : formatNumber(user.media)}</td>
258
+ <td>${isInactive ? '-' : user.active_days}</td>
259
+ <td>${isInactive ? '-' : user.daily_average}</td>
260
+ </tr>
261
+ `;
262
+ }).join('');
263
+
264
+ // Render pagination
265
+ const totalPages = Math.ceil(filtered.length / pageSize);
266
+ renderPagination(totalPages);
267
+ }
268
+
269
+ function renderPagination(totalPages) {
270
+ const pagination = document.getElementById('pagination');
271
+
272
+ if (totalPages <= 1) {
273
+ pagination.innerHTML = '';
274
+ return;
275
+ }
276
+
277
+ let html = '';
278
+
279
+ // Previous button
280
+ html += `<button class="page-btn" onclick="goToPage(${currentPage - 1})"
281
+ ${currentPage === 1 ? 'disabled' : ''}>&laquo;</button>`;
282
+
283
+ // Page numbers
284
+ const maxVisible = 5;
285
+ let startPage = Math.max(1, currentPage - Math.floor(maxVisible / 2));
286
+ let endPage = Math.min(totalPages, startPage + maxVisible - 1);
287
+
288
+ if (endPage - startPage < maxVisible - 1) {
289
+ startPage = Math.max(1, endPage - maxVisible + 1);
290
+ }
291
+
292
+ if (startPage > 1) {
293
+ html += `<button class="page-btn" onclick="goToPage(1)">1</button>`;
294
+ if (startPage > 2) html += `<span style="padding: 0 0.5rem;">...</span>`;
295
+ }
296
+
297
+ for (let i = startPage; i <= endPage; i++) {
298
+ html += `<button class="page-btn ${i === currentPage ? 'active' : ''}"
299
+ onclick="goToPage(${i})">${i}</button>`;
300
+ }
301
+
302
+ if (endPage < totalPages) {
303
+ if (endPage < totalPages - 1) html += `<span style="padding: 0 0.5rem;">...</span>`;
304
+ html += `<button class="page-btn" onclick="goToPage(${totalPages})">${totalPages}</button>`;
305
+ }
306
+
307
+ // Next button
308
+ html += `<button class="page-btn" onclick="goToPage(${currentPage + 1})"
309
+ ${currentPage === totalPages ? 'disabled' : ''}>&raquo;</button>`;
310
+
311
+ pagination.innerHTML = html;
312
+ }
313
+
314
+ function goToPage(page) {
315
+ currentPage = page;
316
+ renderUsers();
317
+ window.scrollTo({ top: 0, behavior: 'smooth' });
318
+ }
319
+
320
+ function openUserProfile(userId) {
321
+ window.location.href = `/user/${userId}`;
322
+ }
323
+
324
+ // Export function
325
+ function exportUsers() {
326
+ const timeframe = document.getElementById('timeframe').value;
327
+ window.location.href = `/api/export/users?timeframe=${timeframe}`;
328
+ }
329
+
330
+ // Helper functions
331
+ function formatNumber(num) {
332
+ if (num >= 1000000) return (num / 1000000).toFixed(1) + 'M';
333
+ if (num >= 1000) return (num / 1000).toFixed(1) + 'K';
334
+ return num.toString();
335
+ }
336
+
337
+ function escapeHtml(text) {
338
+ const div = document.createElement('div');
339
+ div.textContent = text;
340
+ return div.innerHTML;
341
+ }
342
+ </script>
343
+ </body>
344
+ </html>