Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- .gitattributes +1 -0
- Dockerfile +31 -0
- README.md +1037 -11
- ai_search.py +776 -0
- algorithms.py +819 -0
- dashboard.py +2086 -0
- data_structures.py +773 -0
- indexer.py +817 -0
- requirements.txt +4 -0
- schema.sql +200 -0
- search.py +564 -0
- semantic_search.py +411 -0
- static/css/style.css +859 -0
- static/js/dashboard.js +622 -0
- templates/chat.html +831 -0
- templates/index.html +223 -0
- templates/moderation.html +459 -0
- templates/search.html +359 -0
- templates/settings.html +444 -0
- templates/user_profile.html +721 -0
- templates/users.html +344 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
telegram.db filter=lfs diff=lfs merge=lfs -text
|
Dockerfile
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.12-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
# Install dependencies
|
| 6 |
+
COPY requirements.txt .
|
| 7 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 8 |
+
|
| 9 |
+
# Copy application code
|
| 10 |
+
COPY dashboard.py .
|
| 11 |
+
COPY ai_search.py .
|
| 12 |
+
COPY algorithms.py .
|
| 13 |
+
COPY data_structures.py .
|
| 14 |
+
COPY indexer.py .
|
| 15 |
+
COPY search.py .
|
| 16 |
+
COPY semantic_search.py .
|
| 17 |
+
COPY schema.sql .
|
| 18 |
+
COPY static/ static/
|
| 19 |
+
COPY templates/ templates/
|
| 20 |
+
|
| 21 |
+
# Copy database
|
| 22 |
+
COPY telegram.db .
|
| 23 |
+
|
| 24 |
+
# HF Spaces uses port 7860
|
| 25 |
+
ENV PORT=7860
|
| 26 |
+
ENV HOST=0.0.0.0
|
| 27 |
+
ENV DB_PATH=telegram.db
|
| 28 |
+
|
| 29 |
+
EXPOSE 7860
|
| 30 |
+
|
| 31 |
+
CMD ["gunicorn", "dashboard:app", "--bind", "0.0.0.0:7860", "--workers", "2", "--timeout", "120"]
|
README.md
CHANGED
|
@@ -1,11 +1,1037 @@
|
|
| 1 |
-
---
|
| 2 |
-
title: Telegram Analytics
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
-
sdk: docker
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Telegram Analytics Dashboard
|
| 3 |
+
emoji: 📊
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: indigo
|
| 6 |
+
sdk: docker
|
| 7 |
+
app_port: 7860
|
| 8 |
+
---
|
| 9 |
+
|
| 10 |
+
# Telegram JSON Indexer & Analyzer
|
| 11 |
+
|
| 12 |
+
A high-performance system for indexing, searching, and analyzing Telegram chat exports using SQLite FTS5 and advanced algorithms from Data Structures course. Includes a full-featured **Web Dashboard** with **AI-powered search**.
|
| 13 |
+
|
| 14 |
+
```
|
| 15 |
+
╔══════════════════════════════════════════════════════════════════════════════╗
|
| 16 |
+
║ TELEGRAM CHAT ANALYZER ║
|
| 17 |
+
║ ║
|
| 18 |
+
║ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────────────────────┐ ║
|
| 19 |
+
║ │ JSON │───▶│ INDEXER │───▶│ SQLite │───▶│ WEB DASHBOARD │ ║
|
| 20 |
+
║ │ Export │ │ Bloom │ │ + FTS5 │ │ ┌─────┬─────┬─────┐ │ ║
|
| 21 |
+
║ │ │ │ Filter │ │ │ │ │Stats│Users│Chat │ │ ║
|
| 22 |
+
║ └─────────┘ └─────────┘ └─────────┘ │ ├─────┼─────┼─────┤ │ ║
|
| 23 |
+
║ │ │Search│ AI │Mod │ │ ║
|
| 24 |
+
║ │ └─────┴─────┴─────┘ │ ║
|
| 25 |
+
║ └─────────────────────────┘ ║
|
| 26 |
+
╚══════════════════════════════════════════════════════════════════════════════╝
|
| 27 |
+
```
|
| 28 |
+
|
| 29 |
+
## Features
|
| 30 |
+
|
| 31 |
+
### Core Features
|
| 32 |
+
- **Full-Text Search** - Fast search with Hebrew support using SQLite FTS5
|
| 33 |
+
- **Fuzzy Search** - Find messages even with typos using trigram similarity
|
| 34 |
+
- **Similar Message Detection** - LCS algorithm finds duplicates/reposts
|
| 35 |
+
- **Conversation Threads** - DFS/BFS traversal reconstructs reply chains
|
| 36 |
+
- **User Rankings** - O(log n) rank queries using AVL Rank Tree
|
| 37 |
+
- **Time Analytics** - Bucket Sort for efficient histograms
|
| 38 |
+
- **Top-K Queries** - Heap-based O(n log k) instead of O(n log n)
|
| 39 |
+
- **Percentiles** - O(n) median/percentiles using Selection algorithm
|
| 40 |
+
|
| 41 |
+
### Web Dashboard
|
| 42 |
+
- **Interactive Overview** - Charts, stats, activity graphs
|
| 43 |
+
- **User Leaderboard** - Rankings with detailed user profiles
|
| 44 |
+
- **Telegram-like Chat View** - Browse all messages like in Telegram
|
| 45 |
+
- **Advanced Search** - Full-text + fuzzy search with filters
|
| 46 |
+
- **AI-Powered Search** - Natural language queries (Hebrew/English)
|
| 47 |
+
- **Moderation Analytics** - Links, mentions, domains analysis
|
| 48 |
+
- **Database Updates** - Upload new JSON files via web UI
|
| 49 |
+
|
| 50 |
+
### AI Search (Free Providers)
|
| 51 |
+
- **Ollama** - Local LLM (recommended, 100% free)
|
| 52 |
+
- **Groq** - Free API tier available
|
| 53 |
+
- **Google Gemini** - Free API tier available
|
| 54 |
+
|
| 55 |
+
---
|
| 56 |
+
|
| 57 |
+
## Table of Contents
|
| 58 |
+
|
| 59 |
+
1. [Installation](#installation)
|
| 60 |
+
2. [Quick Start](#quick-start)
|
| 61 |
+
3. [Web Dashboard](#web-dashboard)
|
| 62 |
+
4. [AI Search](#ai-search)
|
| 63 |
+
5. [Database Updates](#database-updates)
|
| 64 |
+
6. [Architecture](#architecture)
|
| 65 |
+
7. [Usage Guide](#usage-guide)
|
| 66 |
+
8. [Algorithms](#algorithms)
|
| 67 |
+
9. [API Reference](#api-reference)
|
| 68 |
+
10. [Examples](#examples)
|
| 69 |
+
|
| 70 |
+
---
|
| 71 |
+
|
| 72 |
+
## Installation
|
| 73 |
+
|
| 74 |
+
### Requirements
|
| 75 |
+
|
| 76 |
+
- Python 3.10 or higher
|
| 77 |
+
- No external packages required for core functionality
|
| 78 |
+
|
| 79 |
+
### Setup
|
| 80 |
+
|
| 81 |
+
```bash
|
| 82 |
+
# Clone or download the project
|
| 83 |
+
cd telegram
|
| 84 |
+
|
| 85 |
+
# Verify Python version
|
| 86 |
+
python --version # Should be 3.10+
|
| 87 |
+
|
| 88 |
+
# Test the system
|
| 89 |
+
python algorithms.py # Should print "ALL TESTS PASSED!"
|
| 90 |
+
```
|
| 91 |
+
|
| 92 |
+
### Optional: Semantic Search
|
| 93 |
+
|
| 94 |
+
For AI-powered semantic similarity search:
|
| 95 |
+
|
| 96 |
+
```bash
|
| 97 |
+
pip install numpy faiss-cpu sentence-transformers
|
| 98 |
+
```
|
| 99 |
+
|
| 100 |
+
---
|
| 101 |
+
|
| 102 |
+
## Quick Start
|
| 103 |
+
|
| 104 |
+
### Step 1: Export from Telegram
|
| 105 |
+
|
| 106 |
+
1. Open Telegram Desktop
|
| 107 |
+
2. Go to any chat/group
|
| 108 |
+
3. Click ⋮ → Export Chat History
|
| 109 |
+
4. Select JSON format
|
| 110 |
+
5. Save as `result.json`
|
| 111 |
+
|
| 112 |
+
### Step 2: Index Your Data
|
| 113 |
+
|
| 114 |
+
```bash
|
| 115 |
+
python indexer.py result.json --db telegram.db
|
| 116 |
+
```
|
| 117 |
+
|
| 118 |
+
### Step 3: Launch Web Dashboard
|
| 119 |
+
|
| 120 |
+
```bash
|
| 121 |
+
# Start the dashboard (recommended)
|
| 122 |
+
python dashboard.py
|
| 123 |
+
|
| 124 |
+
# Open in browser: http://localhost:5000
|
| 125 |
+
```
|
| 126 |
+
|
| 127 |
+
### Step 4: Search & Analyze (CLI)
|
| 128 |
+
|
| 129 |
+
```bash
|
| 130 |
+
# Search messages
|
| 131 |
+
python search.py "שלום"
|
| 132 |
+
|
| 133 |
+
# View statistics
|
| 134 |
+
python analyzer.py --stats
|
| 135 |
+
|
| 136 |
+
# Find similar messages
|
| 137 |
+
python analyzer.py --similar
|
| 138 |
+
```
|
| 139 |
+
|
| 140 |
+
---
|
| 141 |
+
|
| 142 |
+
## Web Dashboard
|
| 143 |
+
|
| 144 |
+
The web dashboard provides a complete visual interface for analyzing your Telegram data.
|
| 145 |
+
|
| 146 |
+
### Starting the Dashboard
|
| 147 |
+
|
| 148 |
+
```bash
|
| 149 |
+
python dashboard.py
|
| 150 |
+
# Or with custom port:
|
| 151 |
+
python dashboard.py --port 8080
|
| 152 |
+
```
|
| 153 |
+
|
| 154 |
+
### Dashboard Pages
|
| 155 |
+
|
| 156 |
+
```
|
| 157 |
+
┌─────────────────────────────────────────────────────────────────────────┐
|
| 158 |
+
│ WEB DASHBOARD │
|
| 159 |
+
├─────────────────────────────────────────────────────────────────────────┤
|
| 160 |
+
│ │
|
| 161 |
+
│ 📈 Overview │ Main statistics, charts, activity graphs │
|
| 162 |
+
│ │ - Total messages, users, links, media │
|
| 163 |
+
│ │ - Daily/hourly activity charts │
|
| 164 |
+
│ │ - Top users leaderboard │
|
| 165 |
+
│ │
|
| 166 |
+
│ 👥 Users │ User leaderboard with detailed profiles │
|
| 167 |
+
│ │ - Ranking by message count │
|
| 168 |
+
│ │ - User details modal (hourly activity) │
|
| 169 |
+
│ │ - Export users to CSV │
|
| 170 |
+
│ │
|
| 171 |
+
│ 💬 Chat │ Telegram-like message view │
|
| 172 |
+
│ │ - Browse all messages chronologically │
|
| 173 |
+
│ │ - Filter by user, date, media type │
|
| 174 |
+
│ │ - Click message to view full thread │
|
| 175 |
+
│ │ - AI search with natural language │
|
| 176 |
+
│ │
|
| 177 |
+
│ 🔍 Search │ Advanced search interface │
|
| 178 |
+
│ │ - Full-text search (Hebrew supported) │
|
| 179 |
+
│ │ - AI-powered natural language search │
|
| 180 |
+
│ │ - Boolean operators (AND, OR, NOT) │
|
| 181 |
+
│ │ - Export search results │
|
| 182 |
+
│ │
|
| 183 |
+
│ 🛡️ Moderation │ Content analytics │
|
| 184 |
+
│ │ - Top shared domains │
|
| 185 |
+
│ │ - Most mentioned users │
|
| 186 |
+
│ │ - Link sharers leaderboard │
|
| 187 |
+
│ │ - Word frequency analysis │
|
| 188 |
+
│ │
|
| 189 |
+
│ ⚙️ Settings │ Database management │
|
| 190 |
+
│ │ - View database statistics │
|
| 191 |
+
│ │ - Upload new JSON files │
|
| 192 |
+
│ │ - Automatic duplicate detection │
|
| 193 |
+
│ │
|
| 194 |
+
└─────────────────────────────────────────────────────────────────────────┘
|
| 195 |
+
```
|
| 196 |
+
|
| 197 |
+
### Dashboard Features
|
| 198 |
+
|
| 199 |
+
- **Dark Theme** - Modern dark UI, easy on the eyes
|
| 200 |
+
- **RTL Support** - Full Hebrew/Arabic text support
|
| 201 |
+
- **Responsive** - Works on mobile and desktop
|
| 202 |
+
- **Real-time Charts** - Interactive Chart.js visualizations
|
| 203 |
+
- **Export** - Download data as CSV/JSON
|
| 204 |
+
|
| 205 |
+
---
|
| 206 |
+
|
| 207 |
+
## AI Search
|
| 208 |
+
|
| 209 |
+
Ask questions about your chat data in natural language (Hebrew or English).
|
| 210 |
+
|
| 211 |
+
### Setup AI Provider (Free Options)
|
| 212 |
+
|
| 213 |
+
#### Option 1: Ollama (Recommended - 100% Local & Free)
|
| 214 |
+
|
| 215 |
+
```bash
|
| 216 |
+
# Install Ollama (https://ollama.ai)
|
| 217 |
+
curl -fsSL https://ollama.ai/install.sh | sh
|
| 218 |
+
|
| 219 |
+
# Pull a model
|
| 220 |
+
ollama pull llama3.2
|
| 221 |
+
|
| 222 |
+
# Start Ollama server
|
| 223 |
+
ollama serve
|
| 224 |
+
```
|
| 225 |
+
|
| 226 |
+
#### Option 2: Groq (Free API Tier)
|
| 227 |
+
|
| 228 |
+
```bash
|
| 229 |
+
# Get free API key from https://console.groq.com
|
| 230 |
+
export GROQ_API_KEY="your_api_key"
|
| 231 |
+
```
|
| 232 |
+
|
| 233 |
+
#### Option 3: Google Gemini (Free API Tier)
|
| 234 |
+
|
| 235 |
+
```bash
|
| 236 |
+
# Get free API key from https://makersuite.google.com/app/apikey
|
| 237 |
+
export GEMINI_API_KEY="your_api_key"
|
| 238 |
+
```
|
| 239 |
+
|
| 240 |
+
### AI Search Examples
|
| 241 |
+
|
| 242 |
+
```
|
| 243 |
+
┌─────────────────────────────────────────────────────────────────────────┐
|
| 244 |
+
│ 🤖 AI Search - Natural Language Queries │
|
| 245 |
+
├─────────────────────────────────────────────────────────────────────────┤
|
| 246 |
+
│ │
|
| 247 |
+
│ Query: "מי שלח הכי הרבה הודעות?" │
|
| 248 |
+
│ Answer: המשתמש הפעיל ביותר הוא דני עם 5,432 הודעות │
|
| 249 |
+
│ │
|
| 250 |
+
│ Query: "מתי היו הכי הרבה הודעות?" │
|
| 251 |
+
│ Answer: היום הפעיל ביותר היה 15.03.2024 עם 342 הודעות │
|
| 252 |
+
│ │
|
| 253 |
+
│ Query: "Who mentioned @admin the most?" │
|
| 254 |
+
│ Answer: User "Mike" mentioned @admin 47 times │
|
| 255 |
+
│ │
|
| 256 |
+
│ Query: "הראה הודעות עם קישורים מהשבוע האחרון" │
|
| 257 |
+
│ Answer: נמצאו 23 הודעות עם קישורים... │
|
| 258 |
+
│ │
|
| 259 |
+
└─────────────────────────────────────────────────────────────────────────┘
|
| 260 |
+
```
|
| 261 |
+
|
| 262 |
+
### AI Search API
|
| 263 |
+
|
| 264 |
+
```python
|
| 265 |
+
from ai_search import AISearchEngine
|
| 266 |
+
|
| 267 |
+
# Initialize with Ollama (local)
|
| 268 |
+
ai = AISearchEngine('telegram.db', provider='ollama')
|
| 269 |
+
|
| 270 |
+
# Or with Groq
|
| 271 |
+
ai = AISearchEngine('telegram.db', provider='groq', api_key='your_key')
|
| 272 |
+
|
| 273 |
+
# Search
|
| 274 |
+
result = ai.search("מי הכי פעיל בלילה?")
|
| 275 |
+
print(result['answer']) # Natural language answer
|
| 276 |
+
print(result['sql']) # Generated SQL query
|
| 277 |
+
print(result['results']) # Raw data
|
| 278 |
+
```
|
| 279 |
+
|
| 280 |
+
---
|
| 281 |
+
|
| 282 |
+
## Database Updates
|
| 283 |
+
|
| 284 |
+
Update your database with new JSON exports without losing existing data.
|
| 285 |
+
|
| 286 |
+
### Via Web UI
|
| 287 |
+
|
| 288 |
+
1. Go to **Settings** page in the dashboard
|
| 289 |
+
2. Drag & drop your new `result.json` file
|
| 290 |
+
3. Wait for processing (duplicate detection automatic)
|
| 291 |
+
4. See summary of new messages added
|
| 292 |
+
|
| 293 |
+
### Via CLI
|
| 294 |
+
|
| 295 |
+
```bash
|
| 296 |
+
# Update existing database with new JSON
|
| 297 |
+
python indexer.py new_export.json --db telegram.db --update
|
| 298 |
+
|
| 299 |
+
# What happens:
|
| 300 |
+
# 1. Loads existing message IDs into Bloom filter (O(n))
|
| 301 |
+
# 2. For each message in JSON:
|
| 302 |
+
# - Check if exists using Bloom filter (O(1))
|
| 303 |
+
# - Only insert if new
|
| 304 |
+
# 3. Re-index FTS if needed
|
| 305 |
+
# 4. Report: X new messages, Y duplicates skipped
|
| 306 |
+
```
|
| 307 |
+
|
| 308 |
+
### Incremental Update Process
|
| 309 |
+
|
| 310 |
+
```
|
| 311 |
+
┌─────────────────────────────────────────────────────────────────────────┐
|
| 312 |
+
│ INCREMENTAL UPDATE PROCESS │
|
| 313 |
+
├─────────────────────────────────────────────────────────────────────────┤
|
| 314 |
+
│ │
|
| 315 |
+
│ Existing DB New JSON │
|
| 316 |
+
│ ┌─────────────┐ ┌─────────────┐ │
|
| 317 |
+
│ │ msg_1 ✓ │ │ msg_1 │ → Skip (duplicate) │
|
| 318 |
+
│ │ msg_2 ✓ │ │ msg_2 │ → Skip (duplicate) │
|
| 319 |
+
│ │ msg_3 ✓ │ │ msg_5 NEW │ → Insert │
|
| 320 |
+
│ │ msg_4 ✓ │ │ msg_6 NEW │ → Insert │
|
| 321 |
+
│ └─────────────┘ └─────────────┘ │
|
| 322 |
+
│ │ │ │
|
| 323 |
+
│ │ Bloom Filter │ │
|
| 324 |
+
│ │ ┌───────────┐ │ │
|
| 325 |
+
│ └─────▶│ O(1) test │◀─────────┘ │
|
| 326 |
+
│ └───────────┘ │
|
| 327 |
+
│ │
|
| 328 |
+
│ Result: Only msg_5 and msg_6 added (fast!) │
|
| 329 |
+
│ │
|
| 330 |
+
└─────────────────────────────────────────────────────────────────────────┘
|
| 331 |
+
```
|
| 332 |
+
|
| 333 |
+
---
|
| 334 |
+
|
| 335 |
+
## Architecture
|
| 336 |
+
|
| 337 |
+
### System Overview
|
| 338 |
+
|
| 339 |
+
```
|
| 340 |
+
┌─────────────────────────────────────────────────────────────────┐
|
| 341 |
+
│ INPUT │
|
| 342 |
+
│ ┌─────────────────────────────────────────────────────────┐ │
|
| 343 |
+
│ │ Telegram JSON Export (result.json) │ │
|
| 344 |
+
│ │ ├── messages[] │ │
|
| 345 |
+
│ │ │ ├── id, date, from, text │ │
|
| 346 |
+
│ │ │ ├── reply_to_message_id │ │
|
| 347 |
+
│ │ │ └── text_entities[] (links, mentions) │ │
|
| 348 |
+
│ │ └── ... │ │
|
| 349 |
+
│ └─────────────────────────────────────────────────────────┘ │
|
| 350 |
+
└─────────────────────────┬───────────────────────────────────────┘
|
| 351 |
+
│
|
| 352 |
+
▼
|
| 353 |
+
┌─────────────────────────────────────────────────────────────────┐
|
| 354 |
+
│ INDEXER (indexer.py) │
|
| 355 |
+
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
|
| 356 |
+
│ │ Batch │ │ Bloom │ │ Reply │ │
|
| 357 |
+
│ │ Processing │ │ Filter │ │ Graph │ │
|
| 358 |
+
│ │ (1000/tx) │ │ (Dedup O(1))│ │ Builder │ │
|
| 359 |
+
│ └─────────────┘ └─────────────┘ └─────────────┘ │
|
| 360 |
+
└─────────────────────────┬───────────────────────────────────────┘
|
| 361 |
+
│
|
| 362 |
+
▼
|
| 363 |
+
┌─────────────────────────────────────────────────────────────────┐
|
| 364 |
+
│ SQLite DATABASE │
|
| 365 |
+
│ ┌─────────────────────────────────────────────────────────┐ │
|
| 366 |
+
│ │ messages │ FTS5 Index │ reply_graph │ │
|
| 367 |
+
│ │ ├── id (PK) │ ├── text_plain │ ├── parent_id │ │
|
| 368 |
+
│ │ ├── text_plain │ └── from_name │ └── child_id │ │
|
| 369 |
+
│ │ ├── from_id │ │ │ │
|
| 370 |
+
│ │ ├── date_unixtime │ entities │ threads │ │
|
| 371 |
+
│ │ └── ... │ ├── links │ └── messages │ │
|
| 372 |
+
│ │ │ └── mentions │ │ │
|
| 373 |
+
│ └─────────────────────────────────────────────────────────┘ │
|
| 374 |
+
└─────────────────────────┬───────────────────────────────────────┘
|
| 375 |
+
│
|
| 376 |
+
┌───────────────┼───────────────┐
|
| 377 |
+
▼ ▼ ▼
|
| 378 |
+
┌─────────────┐ ┌─────────────┐ ┌─────────────┐
|
| 379 |
+
│ SEARCH │ │ ANALYZER │ │ VECTOR │
|
| 380 |
+
│ (search.py) │ │(analyzer.py)│ │ (optional) │
|
| 381 |
+
│ │ │ │ │ │
|
| 382 |
+
│ • FTS5+BM25 │ │ • Top-K │ │ • FAISS │
|
| 383 |
+
│ • Fuzzy │ │ • LCS │ │ • Semantic │
|
| 384 |
+
│ • Threads │ │ • Rank Tree │ │ • Clustering│
|
| 385 |
+
│ • LRU Cache │ │ • Percentile│ │ │
|
| 386 |
+
└─────────────┘ └─────────────┘ └─────────────┘
|
| 387 |
+
```
|
| 388 |
+
|
| 389 |
+
### Data Flow
|
| 390 |
+
|
| 391 |
+
```
|
| 392 |
+
JSON Message Database Tables Search/Analytics
|
| 393 |
+
─────────── ─────────────── ────────────────
|
| 394 |
+
|
| 395 |
+
{ ┌─────────────┐
|
| 396 |
+
"id": 548795, ───▶ │ messages │ ───▶ Full-text search
|
| 397 |
+
"text": "שלום", └─────────────┘ User filtering
|
| 398 |
+
"from": "User1", Date range queries
|
| 399 |
+
"from_id": "user123", ─▶ ┌─────────────┐
|
| 400 |
+
"date_unixtime": ..., │ users │ ───▶ Top users (Heap)
|
| 401 |
+
└─────────────┘ User rank (Rank Tree)
|
| 402 |
+
"text_entities": [
|
| 403 |
+
{"type": "link", ────▶ ┌─────────────┐
|
| 404 |
+
"text": "url"} │ entities │ ───▶ Link analysis
|
| 405 |
+
], └─────────────┘ Mention network
|
| 406 |
+
|
| 407 |
+
"reply_to_message_id" ─▶ ┌─────────────┐
|
| 408 |
+
│ reply_graph │ ───▶ Thread DFS/BFS
|
| 409 |
+
} └─────────────┘ Conversation view
|
| 410 |
+
```
|
| 411 |
+
|
| 412 |
+
### File Structure
|
| 413 |
+
|
| 414 |
+
```
|
| 415 |
+
telegram/
|
| 416 |
+
│
|
| 417 |
+
├── dashboard.py # 🌐 Web Dashboard (Flask)
|
| 418 |
+
│ └── Routes: /, /users, /chat, /search, /moderation, /settings
|
| 419 |
+
│ └── API: /api/overview, /api/users, /api/search, /api/update, etc.
|
| 420 |
+
│
|
| 421 |
+
├── ai_search.py # 🤖 AI-Powered Search
|
| 422 |
+
│ └── AISearchEngine class
|
| 423 |
+
│ ├── Natural language to SQL
|
| 424 |
+
│ ├── Ollama/Groq/Gemini providers
|
| 425 |
+
│ └── Hebrew/English support
|
| 426 |
+
│
|
| 427 |
+
├── indexer.py # JSON → SQLite indexer
|
| 428 |
+
│ ├── OptimizedIndexer class
|
| 429 |
+
│ │ ├── Batch processing (100x faster)
|
| 430 |
+
│ │ ├── Bloom filter (duplicate detection)
|
| 431 |
+
│ │ └── Graph builder (reply threads)
|
| 432 |
+
│ └── IncrementalIndexer class
|
| 433 |
+
│ ├── Update existing database
|
| 434 |
+
│ ├── Bloom filter duplicate check
|
| 435 |
+
│ └── Only insert new messages
|
| 436 |
+
│
|
| 437 |
+
├── search.py # Search interface
|
| 438 |
+
│ └── TelegramSearch class
|
| 439 |
+
│ ├── FTS5 full-text search
|
| 440 |
+
│ ├── Fuzzy trigram search
|
| 441 |
+
│ ├── LRU query cache
|
| 442 |
+
│ └── DFS/BFS thread traversal
|
| 443 |
+
│
|
| 444 |
+
├── analyzer.py # Analytics & statistics
|
| 445 |
+
│ └── TelegramAnalyzer class
|
| 446 |
+
│ ├── LCS similar messages
|
| 447 |
+
│ ├── Heap-based Top-K
|
| 448 |
+
│ ├── Selection percentiles
|
| 449 |
+
│ ├── Rank Tree queries
|
| 450 |
+
│ └── Bucket Sort histograms
|
| 451 |
+
│
|
| 452 |
+
├── data_structures.py # Core data structures
|
| 453 |
+
│ ├── BloomFilter # O(1) membership test
|
| 454 |
+
│ ├── Trie # O(k) prefix search
|
| 455 |
+
│ ├── LRUCache # O(1) caching
|
| 456 |
+
│ ├── ReplyGraph # DFS/BFS traversal
|
| 457 |
+
│ └── TrigramIndex # Fuzzy matching
|
| 458 |
+
│
|
| 459 |
+
├── algorithms.py # Course algorithms
|
| 460 |
+
│ ├── LCS # Similar message detection
|
| 461 |
+
│ ├── TopK (Heap) # Efficient ranking
|
| 462 |
+
│ ├── Selection # O(n) percentiles
|
| 463 |
+
│ ├── RankTree # O(log n) rank queries
|
| 464 |
+
│ └── BucketSort # Time histograms
|
| 465 |
+
│
|
| 466 |
+
├── templates/ # 🎨 HTML Templates
|
| 467 |
+
│ ├── index.html # Overview dashboard
|
| 468 |
+
│ ├── users.html # User leaderboard
|
| 469 |
+
│ ├── chat.html # Telegram-like chat view
|
| 470 |
+
│ ├── search.html # Search interface
|
| 471 |
+
│ ├── moderation.html # Content analytics
|
| 472 |
+
│ └── settings.html # Settings & DB update
|
| 473 |
+
│
|
| 474 |
+
├── static/ # 📁 Static assets
|
| 475 |
+
│ ├── css/style.css # Dashboard styles
|
| 476 |
+
│ └── js/dashboard.js # Dashboard scripts
|
| 477 |
+
│
|
| 478 |
+
├── vector_search.py # Optional: Semantic search
|
| 479 |
+
│ └── VectorSearch class (requires FAISS)
|
| 480 |
+
│
|
| 481 |
+
├── schema.sql # Database schema
|
| 482 |
+
└── telegram.db # SQLite database (created)
|
| 483 |
+
```
|
| 484 |
+
|
| 485 |
+
---
|
| 486 |
+
|
| 487 |
+
## Usage Guide
|
| 488 |
+
|
| 489 |
+
### Web Dashboard (Recommended)
|
| 490 |
+
|
| 491 |
+
```bash
|
| 492 |
+
# Start the dashboard
|
| 493 |
+
python dashboard.py
|
| 494 |
+
|
| 495 |
+
# Custom port
|
| 496 |
+
python dashboard.py --port 8080
|
| 497 |
+
|
| 498 |
+
# Custom database
|
| 499 |
+
python dashboard.py --db my_chat.db
|
| 500 |
+
```
|
| 501 |
+
|
| 502 |
+
### Indexing
|
| 503 |
+
|
| 504 |
+
```bash
|
| 505 |
+
# Basic indexing
|
| 506 |
+
python indexer.py result.json
|
| 507 |
+
|
| 508 |
+
# Custom database name
|
| 509 |
+
python indexer.py result.json --db my_chat.db
|
| 510 |
+
|
| 511 |
+
# With trigram index (for fuzzy search)
|
| 512 |
+
python indexer.py result.json --build-trigrams
|
| 513 |
+
|
| 514 |
+
# Larger batch size (faster for big files)
|
| 515 |
+
python indexer.py result.json --batch-size 5000
|
| 516 |
+
|
| 517 |
+
# Update existing database with new JSON (incremental)
|
| 518 |
+
python indexer.py new_export.json --db telegram.db --update
|
| 519 |
+
```
|
| 520 |
+
|
| 521 |
+
### Searching
|
| 522 |
+
|
| 523 |
+
```bash
|
| 524 |
+
# Basic search (Hebrew supported)
|
| 525 |
+
python search.py "שלום"
|
| 526 |
+
|
| 527 |
+
# Search with filters
|
| 528 |
+
python search.py "מילה" --user user123456 --limit 50
|
| 529 |
+
|
| 530 |
+
# Date range
|
| 531 |
+
python search.py "חדשות" --from-date 2024-01-01 --to-date 2024-12-31
|
| 532 |
+
|
| 533 |
+
# Fuzzy search (finds typos)
|
| 534 |
+
python search.py "שלמ" --fuzzy --threshold 0.3
|
| 535 |
+
|
| 536 |
+
# View conversation thread
|
| 537 |
+
python search.py --thread 548795
|
| 538 |
+
|
| 539 |
+
# List all links
|
| 540 |
+
python search.py --list-links
|
| 541 |
+
|
| 542 |
+
# List all mentions
|
| 543 |
+
python search.py --list-mentions
|
| 544 |
+
```
|
| 545 |
+
|
| 546 |
+
### Analytics
|
| 547 |
+
|
| 548 |
+
```bash
|
| 549 |
+
# General statistics
|
| 550 |
+
python analyzer.py --stats
|
| 551 |
+
|
| 552 |
+
# Top users (Heap-based O(n log k))
|
| 553 |
+
python analyzer.py --top-users --limit 10
|
| 554 |
+
|
| 555 |
+
# Hourly activity
|
| 556 |
+
python analyzer.py --hourly
|
| 557 |
+
|
| 558 |
+
# Daily activity
|
| 559 |
+
python analyzer.py --daily
|
| 560 |
+
|
| 561 |
+
# Top words
|
| 562 |
+
python analyzer.py --words --limit 30
|
| 563 |
+
|
| 564 |
+
# Top domains
|
| 565 |
+
python analyzer.py --domains
|
| 566 |
+
|
| 567 |
+
# Find similar messages (LCS algorithm)
|
| 568 |
+
python analyzer.py --similar --threshold 0.7
|
| 569 |
+
|
| 570 |
+
# Find reposts
|
| 571 |
+
python analyzer.py --reposts
|
| 572 |
+
|
| 573 |
+
# Message length percentiles (Selection algorithm)
|
| 574 |
+
python analyzer.py --percentiles
|
| 575 |
+
|
| 576 |
+
# Response time percentiles
|
| 577 |
+
python analyzer.py --response-times
|
| 578 |
+
|
| 579 |
+
# User rank (Rank Tree O(log n))
|
| 580 |
+
python analyzer.py --user-rank user123456
|
| 581 |
+
|
| 582 |
+
# Get user at rank #5
|
| 583 |
+
python analyzer.py --rank 5
|
| 584 |
+
|
| 585 |
+
# Activity histogram (Bucket Sort)
|
| 586 |
+
python analyzer.py --histogram --bucket-size 86400
|
| 587 |
+
|
| 588 |
+
# Export as JSON
|
| 589 |
+
python analyzer.py --stats --json > stats.json
|
| 590 |
+
```
|
| 591 |
+
|
| 592 |
+
---
|
| 593 |
+
|
| 594 |
+
## Algorithms
|
| 595 |
+
|
| 596 |
+
### Algorithm Complexity Comparison
|
| 597 |
+
|
| 598 |
+
```
|
| 599 |
+
┌────────────────────┬─────────────────┬─────────────────┬─────────────┐
|
| 600 |
+
│ Operation │ Naive Method │ Our Algorithm │ Improvement │
|
| 601 |
+
├────────────────────┼─────────────────┼─────────────────┼─────────────┤
|
| 602 |
+
│ Top-K users │ O(n log n) sort │ O(n log k) heap │ ~10x │
|
| 603 |
+
│ Find median │ O(n log n) sort │ O(n) selection │ ~5x │
|
| 604 |
+
│ User rank query │ O(n) scan │ O(log n) tree │ ~100x │
|
| 605 |
+
│ Duplicate check │ O(n) lookup │ O(1) bloom │ ~1000x │
|
| 606 |
+
│ Similar messages │ O(n²m²) naive │ O(n²m) LCS+DP │ ~10x │
|
| 607 |
+
│ Time histogram │ O(n log n) sort │ O(n+k) bucket │ ~5x │
|
| 608 |
+
│ Thread traversal │ O(n) repeated │ O(V+E) DFS/BFS │ ~10x │
|
| 609 |
+
└────────────────────┴─────────────────┴─────────────────┴─────────────┘
|
| 610 |
+
```
|
| 611 |
+
|
| 612 |
+
### 1. LCS (Longest Common Subsequence)
|
| 613 |
+
|
| 614 |
+
**Purpose:** Find similar/duplicate messages
|
| 615 |
+
|
| 616 |
+
```
|
| 617 |
+
String 1: "שלום לכולם מה קורה"
|
| 618 |
+
String 2: "שלום לכולם מה נשמע"
|
| 619 |
+
↓
|
| 620 |
+
LCS: "שלום לכולם מה "
|
| 621 |
+
Similarity: 77.78%
|
| 622 |
+
```
|
| 623 |
+
|
| 624 |
+
**Algorithm:**
|
| 625 |
+
```
|
| 626 |
+
┌───┬───┬───┬───┬───┬───┐
|
| 627 |
+
│ │ ∅ │ A │ B │ C │ D │ DP Table
|
| 628 |
+
├───┼───┼───┼───┼───┼───┤
|
| 629 |
+
│ ∅ │ 0 │ 0 │ 0 │ 0 │ 0 │ dp[i][j] = length of LCS
|
| 630 |
+
│ A │ 0 │ 1 │ 1 │ 1 │ 1 │ for first i and j chars
|
| 631 |
+
│ C │ 0 │ 1 │ 1 │ 2 │ 2 │
|
| 632 |
+
│ B │ 0 │ 1 │ 2 │ 2 │ 2 │ Time: O(m × n)
|
| 633 |
+
│ D │ 0 │ 1 │ 2 │ 2 │ 3 │ Space: O(min(m,n))
|
| 634 |
+
└───┴───┴───┴───┴───┴───┘
|
| 635 |
+
```
|
| 636 |
+
|
| 637 |
+
### 2. Heap-based Top-K
|
| 638 |
+
|
| 639 |
+
**Purpose:** Find top K items without sorting everything
|
| 640 |
+
|
| 641 |
+
```
|
| 642 |
+
Finding Top 3 from [5,2,8,1,9,3,7,4,6]
|
| 643 |
+
|
| 644 |
+
Min-Heap (size K=3):
|
| 645 |
+
|
| 646 |
+
Step 1: [5] Add 5
|
| 647 |
+
Step 2: [2,5] Add 2
|
| 648 |
+
Step 3: [2,5,8] Add 8 (heap full)
|
| 649 |
+
Step 4: [2,5,8] Skip 1 (< min)
|
| 650 |
+
Step 5: [5,9,8] Replace 2 with 9
|
| 651 |
+
Step 6: [5,9,8] Skip 3 (< min)
|
| 652 |
+
Step 7: [7,9,8] Replace 5 with 7
|
| 653 |
+
...
|
| 654 |
+
Result: [7,8,9] Top 3!
|
| 655 |
+
|
| 656 |
+
Time: O(n log k) vs O(n log n) for full sort
|
| 657 |
+
```
|
| 658 |
+
|
| 659 |
+
### 3. Selection Algorithm (Median of Medians)
|
| 660 |
+
|
| 661 |
+
**Purpose:** Find k-th element or percentiles in O(n)
|
| 662 |
+
|
| 663 |
+
```
|
| 664 |
+
Find median of [3,1,4,1,5,9,2,6,5,3,5]
|
| 665 |
+
|
| 666 |
+
┌─────────────────────────────────────────┐
|
| 667 |
+
│ Divide into groups of 5: │
|
| 668 |
+
│ [3,1,4,1,5] [9,2,6,5,3] [5] │
|
| 669 |
+
│ ↓ ↓ ↓ │
|
| 670 |
+
│ Medians: 3 5 5 │
|
| 671 |
+
│ ↓ │
|
| 672 |
+
│ Median of medians: 5 (pivot) │
|
| 673 |
+
│ ↓ │
|
| 674 |
+
│ Partition around 5 │
|
| 675 |
+
│ [3,1,4,1,2,3] [5,5,5] [9,6] │
|
| 676 |
+
│ 6 elements 3 2 │
|
| 677 |
+
│ ↓ │
|
| 678 |
+
│ Median is at position 5 → found! │
|
| 679 |
+
└─────────────────────────────────────────┘
|
| 680 |
+
|
| 681 |
+
Time: O(n) guaranteed (not just average!)
|
| 682 |
+
```
|
| 683 |
+
|
| 684 |
+
### 4. Rank Tree (Order Statistics Tree)
|
| 685 |
+
|
| 686 |
+
**Purpose:** O(log n) rank queries
|
| 687 |
+
|
| 688 |
+
```
|
| 689 |
+
AVL Tree with size augmentation:
|
| 690 |
+
|
| 691 |
+
┌───────────────┐
|
| 692 |
+
│ 150 (size=5) │
|
| 693 |
+
└───────┬───────┘
|
| 694 |
+
┌────────┴────────┐
|
| 695 |
+
┌─────┴─────┐ ┌─────┴─────┐
|
| 696 |
+
│ 100 (s=2) │ │ 250 (s=2) │
|
| 697 |
+
└─────┬─────┘ └─────┬─────┘
|
| 698 |
+
┌─────┴ ┌──┴
|
| 699 |
+
┌───┴───┐ ┌───┴───┐
|
| 700 |
+
│50 (1) │ │300 (1)│
|
| 701 |
+
└───────┘ └───────┘
|
| 702 |
+
|
| 703 |
+
select(3) → 150 (3rd smallest)
|
| 704 |
+
rank(150) → 3 (rank of 150)
|
| 705 |
+
|
| 706 |
+
Time: O(log n) for both operations
|
| 707 |
+
```
|
| 708 |
+
|
| 709 |
+
### 5. Bucket Sort (Time Histograms)
|
| 710 |
+
|
| 711 |
+
**Purpose:** O(n+k) time-based grouping
|
| 712 |
+
|
| 713 |
+
```
|
| 714 |
+
Messages with timestamps:
|
| 715 |
+
[1000, 1500, 2500, 1200, 3000]
|
| 716 |
+
|
| 717 |
+
Bucket size: 1000 seconds
|
| 718 |
+
|
| 719 |
+
┌─────────┬─────────┬─────────┬─────────┐
|
| 720 |
+
│ 0-1000 │1000-2000│2000-3000│3000-4000│
|
| 721 |
+
├─────────┼─────────┼─────────┼─────────┤
|
| 722 |
+
│ │ 1000 │ 2500 │ 3000 │
|
| 723 |
+
│ │ 1500 │ │ │
|
| 724 |
+
│ │ 1200 │ │ │
|
| 725 |
+
├─────────┼─────────┼─────────┼─────────┤
|
| 726 |
+
│ Count:0 │ Count:3 │ Count:1 │ Count:1 │
|
| 727 |
+
└─────────┴─────────┴─────────┴─────────┘
|
| 728 |
+
|
| 729 |
+
Time: O(n + k) where k = number of buckets
|
| 730 |
+
```
|
| 731 |
+
|
| 732 |
+
### 6. DFS/BFS Thread Traversal
|
| 733 |
+
|
| 734 |
+
**Purpose:** Reconstruct conversation threads
|
| 735 |
+
|
| 736 |
+
```
|
| 737 |
+
Reply Graph:
|
| 738 |
+
|
| 739 |
+
[1] Original message
|
| 740 |
+
│
|
| 741 |
+
├──[2] Reply to 1
|
| 742 |
+
│ │
|
| 743 |
+
│ ├──[4] Reply to 2
|
| 744 |
+
│ │
|
| 745 |
+
│ └──[5] Reply to 2
|
| 746 |
+
│
|
| 747 |
+
└──[3] Reply to 1
|
| 748 |
+
|
| 749 |
+
DFS order: [1, 2, 4, 5, 3] (deep first)
|
| 750 |
+
BFS order: [1, 2, 3, 4, 5] (level by level)
|
| 751 |
+
|
| 752 |
+
With depth info:
|
| 753 |
+
[1] depth=0
|
| 754 |
+
[2] depth=1
|
| 755 |
+
[4] depth=2
|
| 756 |
+
[5] depth=2
|
| 757 |
+
[3] depth=1
|
| 758 |
+
|
| 759 |
+
Time: O(V + E)
|
| 760 |
+
```
|
| 761 |
+
|
| 762 |
+
---
|
| 763 |
+
|
| 764 |
+
## API Reference
|
| 765 |
+
|
| 766 |
+
### Dashboard REST API
|
| 767 |
+
|
| 768 |
+
The web dashboard exposes a REST API for all operations:
|
| 769 |
+
|
| 770 |
+
```
|
| 771 |
+
┌─────────────────────────────────────────────────────────────────────────┐
|
| 772 |
+
│ REST API ENDPOINTS │
|
| 773 |
+
├─────────────────────────────────────────────────────────────────────────┤
|
| 774 |
+
│ │
|
| 775 |
+
│ GET /api/overview Overview statistics │
|
| 776 |
+
│ ?timeframe=month (today|yesterday|week|month|year|all) │
|
| 777 |
+
│ │
|
| 778 |
+
│ GET /api/users User leaderboard │
|
| 779 |
+
│ ?timeframe=month Timeframe filter │
|
| 780 |
+
│ &limit=100 Max users │
|
| 781 |
+
│ │
|
| 782 |
+
│ GET /api/user/<user_id> User details │
|
| 783 |
+
│ ?timeframe=month Includes hourly activity │
|
| 784 |
+
│ │
|
| 785 |
+
│ GET /api/search Full-text search │
|
| 786 |
+
│ ?q=search_term Search query │
|
| 787 |
+
│ &timeframe=all Timeframe filter │
|
| 788 |
+
│ &limit=20&offset=0 Pagination │
|
| 789 |
+
│ │
|
| 790 |
+
│ POST /api/ai/search AI-powered search │
|
| 791 |
+
│ {"query": "..."} Natural language query │
|
| 792 |
+
│ │
|
| 793 |
+
│ GET /api/chat/messages Chat messages │
|
| 794 |
+
│ ?limit=50&offset=0 Pagination │
|
| 795 |
+
│ &user_id=... Filter by user │
|
| 796 |
+
│ &from_date=... Date range │
|
| 797 |
+
│ │
|
| 798 |
+
│ GET /api/chat/thread/<id> Get conversation thread │
|
| 799 |
+
│ Returns full thread with DFS │
|
| 800 |
+
│ │
|
| 801 |
+
│ GET /api/top/domains Top shared domains │
|
| 802 |
+
│ GET /api/top/mentions Top mentioned users │
|
| 803 |
+
│ GET /api/top/words Most frequent words │
|
| 804 |
+
│ │
|
| 805 |
+
│ POST /api/update Update database with JSON │
|
| 806 |
+
│ (multipart form) File upload │
|
| 807 |
+
│ │
|
| 808 |
+
│ GET /api/db/stats Database statistics │
|
| 809 |
+
│ Size, counts, date range │
|
| 810 |
+
│ │
|
| 811 |
+
│ GET /api/export/users Export users as CSV │
|
| 812 |
+
│ GET /api/export/messages Export messages as CSV │
|
| 813 |
+
│ │
|
| 814 |
+
├─────────────────────────────────────────────────────────────────────────┤
|
| 815 |
+
│ ALGORITHM-POWERED ENDPOINTS │
|
| 816 |
+
├─────────────────────────────────────────────────────────────────────────┤
|
| 817 |
+
│ │
|
| 818 |
+
│ GET /api/similar/<id> Find similar messages (LCS algorithm) │
|
| 819 |
+
│ ?threshold=0.7 Similarity threshold │
|
| 820 |
+
│ ?limit=10 Max results │
|
| 821 |
+
│ Complexity: O(n*m) n=sample, m=avg length │
|
| 822 |
+
│ │
|
| 823 |
+
│ GET /api/analytics/similar Find all similar pairs in DB │
|
| 824 |
+
│ ?threshold=0.8 Similarity threshold │
|
| 825 |
+
│ Algorithm: LCS O(n² * m) with early termination │
|
| 826 |
+
│ │
|
| 827 |
+
│ GET /api/user/rank/<id> Get user rank (RankTree) │
|
| 828 |
+
│ Complexity: O(log n) vs O(n) SQL scan │
|
| 829 |
+
│ │
|
| 830 |
+
│ GET /api/user/by-rank/<k> Get k-th ranked user (RankTree) │
|
| 831 |
+
│ Algorithm: select(k) O(log n) │
|
| 832 |
+
│ │
|
| 833 |
+
│ GET /api/analytics/histogram Activity histogram (Bucket Sort) │
|
| 834 |
+
│ ?bucket=86400 Bucket size in seconds │
|
| 835 |
+
│ Complexity: O(n + k) k=number of buckets │
|
| 836 |
+
│ │
|
| 837 |
+
│ GET /api/analytics/percentiles Message length stats (Selection) │
|
| 838 |
+
│ Algorithm: Quickselect O(n) guaranteed │
|
| 839 |
+
│ Returns: min,max,median,p25,p75,p90,p95,p99 │
|
| 840 |
+
│ │
|
| 841 |
+
└─────────────────────────────────────────────────────────────────────────┘
|
| 842 |
+
```
|
| 843 |
+
|
| 844 |
+
### TelegramSearch
|
| 845 |
+
|
| 846 |
+
```python
|
| 847 |
+
from search import TelegramSearch
|
| 848 |
+
|
| 849 |
+
with TelegramSearch('telegram.db') as search:
|
| 850 |
+
# Full-text search
|
| 851 |
+
results = search.search("שלום", limit=50)
|
| 852 |
+
|
| 853 |
+
# With filters
|
| 854 |
+
results = search.search(
|
| 855 |
+
"מילה",
|
| 856 |
+
user_id="user123",
|
| 857 |
+
from_date=1704067200, # Unix timestamp
|
| 858 |
+
to_date=1735689600,
|
| 859 |
+
has_links=True
|
| 860 |
+
)
|
| 861 |
+
|
| 862 |
+
# Fuzzy search
|
| 863 |
+
results = search.fuzzy_search("שלמ", threshold=0.3)
|
| 864 |
+
|
| 865 |
+
# Get thread (DFS)
|
| 866 |
+
thread = search.get_thread_dfs(message_id=548795)
|
| 867 |
+
|
| 868 |
+
# Get thread with depth
|
| 869 |
+
thread = search.get_thread_with_depth(message_id=548795)
|
| 870 |
+
# Returns: [(message_dict, depth), ...]
|
| 871 |
+
|
| 872 |
+
# Autocomplete usernames
|
| 873 |
+
suggestions = search.autocomplete_user("@user")
|
| 874 |
+
```
|
| 875 |
+
|
| 876 |
+
### TelegramAnalyzer
|
| 877 |
+
|
| 878 |
+
```python
|
| 879 |
+
from analyzer import TelegramAnalyzer
|
| 880 |
+
|
| 881 |
+
with TelegramAnalyzer('telegram.db') as analyzer:
|
| 882 |
+
# Statistics
|
| 883 |
+
stats = analyzer.get_stats()
|
| 884 |
+
|
| 885 |
+
# Top users (Heap-based)
|
| 886 |
+
top_users = analyzer.get_top_users(limit=10)
|
| 887 |
+
|
| 888 |
+
# Similar messages (LCS)
|
| 889 |
+
similar = analyzer.find_similar_messages(threshold=0.7)
|
| 890 |
+
|
| 891 |
+
# Percentiles (Selection algorithm)
|
| 892 |
+
percentiles = analyzer.get_message_length_stats()
|
| 893 |
+
# Returns: {min, max, median, p25, p75, p90, p95, p99}
|
| 894 |
+
|
| 895 |
+
# User rank (Rank Tree)
|
| 896 |
+
rank_info = analyzer.get_user_rank("user123")
|
| 897 |
+
# Returns: {rank, total_users, percentile}
|
| 898 |
+
|
| 899 |
+
# Get user by rank
|
| 900 |
+
user = analyzer.get_user_by_rank(5)
|
| 901 |
+
|
| 902 |
+
# Histogram (Bucket Sort)
|
| 903 |
+
hist = analyzer.get_activity_histogram(bucket_size=86400)
|
| 904 |
+
```
|
| 905 |
+
|
| 906 |
+
---
|
| 907 |
+
|
| 908 |
+
## Examples
|
| 909 |
+
|
| 910 |
+
### Example 1: Find Most Active Hours
|
| 911 |
+
|
| 912 |
+
```python
|
| 913 |
+
from analyzer import TelegramAnalyzer
|
| 914 |
+
|
| 915 |
+
with TelegramAnalyzer('telegram.db') as analyzer:
|
| 916 |
+
hourly = analyzer.get_hourly_activity()
|
| 917 |
+
|
| 918 |
+
# Find peak hour
|
| 919 |
+
peak_hour = max(hourly, key=hourly.get)
|
| 920 |
+
print(f"Most active hour: {peak_hour}:00 ({hourly[peak_hour]} messages)")
|
| 921 |
+
```
|
| 922 |
+
|
| 923 |
+
### Example 2: Detect Spam/Reposts
|
| 924 |
+
|
| 925 |
+
```python
|
| 926 |
+
from analyzer import TelegramAnalyzer
|
| 927 |
+
|
| 928 |
+
with TelegramAnalyzer('telegram.db') as analyzer:
|
| 929 |
+
reposts = analyzer.find_reposts(threshold=0.9)
|
| 930 |
+
|
| 931 |
+
for r in reposts[:10]:
|
| 932 |
+
print(f"Similarity: {r['similarity']:.0%}")
|
| 933 |
+
print(f" User 1: {r['user_1']}")
|
| 934 |
+
print(f" User 2: {r['user_2']}")
|
| 935 |
+
print(f" Text: {r['text_preview'][:50]}...")
|
| 936 |
+
```
|
| 937 |
+
|
| 938 |
+
### Example 3: Conversation Thread Analysis
|
| 939 |
+
|
| 940 |
+
```python
|
| 941 |
+
from search import TelegramSearch
|
| 942 |
+
|
| 943 |
+
with TelegramSearch('telegram.db') as search:
|
| 944 |
+
# Get full thread
|
| 945 |
+
thread = search.get_thread_with_depth(548795)
|
| 946 |
+
|
| 947 |
+
print("Conversation thread:")
|
| 948 |
+
for msg, depth in thread:
|
| 949 |
+
indent = " " * depth
|
| 950 |
+
print(f"{indent}[{msg['from_name']}]: {msg['text_plain'][:50]}")
|
| 951 |
+
```
|
| 952 |
+
|
| 953 |
+
### Example 4: User Ranking
|
| 954 |
+
|
| 955 |
+
```python
|
| 956 |
+
from analyzer import TelegramAnalyzer
|
| 957 |
+
|
| 958 |
+
with TelegramAnalyzer('telegram.db') as analyzer:
|
| 959 |
+
# Get rank of specific user
|
| 960 |
+
rank = analyzer.get_user_rank("user123456")
|
| 961 |
+
print(f"Rank: #{rank['rank']} of {rank['total_users']}")
|
| 962 |
+
print(f"Top {rank['percentile']:.1f}%")
|
| 963 |
+
|
| 964 |
+
# Get top 3 users
|
| 965 |
+
for i in range(1, 4):
|
| 966 |
+
user = analyzer.get_user_by_rank(i)
|
| 967 |
+
print(f"#{i}: {user['name']} ({user['count']} messages)")
|
| 968 |
+
```
|
| 969 |
+
|
| 970 |
+
---
|
| 971 |
+
|
| 972 |
+
## Performance
|
| 973 |
+
|
| 974 |
+
Tested on 100,000 messages:
|
| 975 |
+
|
| 976 |
+
| Operation | Time |
|
| 977 |
+
|-----------|------|
|
| 978 |
+
| Indexing | ~10 seconds |
|
| 979 |
+
| Full-text search | <10ms |
|
| 980 |
+
| Fuzzy search | ~100ms |
|
| 981 |
+
| Top-K (k=20) | ~50ms |
|
| 982 |
+
| User rank query | <1ms |
|
| 983 |
+
| Thread traversal | <5ms |
|
| 984 |
+
| Similar messages (1000 sample) | ~2 seconds |
|
| 985 |
+
|
| 986 |
+
---
|
| 987 |
+
|
| 988 |
+
## License
|
| 989 |
+
|
| 990 |
+
MIT License - Free for personal and commercial use.
|
| 991 |
+
|
| 992 |
+
---
|
| 993 |
+
|
| 994 |
+
## Contributing
|
| 995 |
+
|
| 996 |
+
1. Fork the repository
|
| 997 |
+
2. Create feature branch
|
| 998 |
+
3. Commit changes
|
| 999 |
+
4. Push and create PR
|
| 1000 |
+
|
| 1001 |
+
---
|
| 1002 |
+
|
| 1003 |
+
## Troubleshooting
|
| 1004 |
+
|
| 1005 |
+
### "Module not found" error
|
| 1006 |
+
```bash
|
| 1007 |
+
# Make sure you're in the telegram directory
|
| 1008 |
+
cd /path/to/telegram
|
| 1009 |
+
python indexer.py result.json
|
| 1010 |
+
```
|
| 1011 |
+
|
| 1012 |
+
### "Database is locked" error
|
| 1013 |
+
```bash
|
| 1014 |
+
# Close any other programs using the database
|
| 1015 |
+
# Or use a different database name
|
| 1016 |
+
python indexer.py result.json --db telegram2.db
|
| 1017 |
+
```
|
| 1018 |
+
|
| 1019 |
+
### Hebrew text not displaying correctly
|
| 1020 |
+
```bash
|
| 1021 |
+
# Ensure your terminal supports UTF-8
|
| 1022 |
+
export LANG=en_US.UTF-8
|
| 1023 |
+
```
|
| 1024 |
+
|
| 1025 |
+
---
|
| 1026 |
+
|
| 1027 |
+
## Credits
|
| 1028 |
+
|
| 1029 |
+
Algorithms implemented from "Data Structures and Introduction to Algorithms" course:
|
| 1030 |
+
- LCS (Longest Common Subsequence)
|
| 1031 |
+
- Heap-based Top-K
|
| 1032 |
+
- Selection Algorithm (Median of Medians)
|
| 1033 |
+
- Rank Tree (Order Statistics Tree)
|
| 1034 |
+
- Bucket Sort
|
| 1035 |
+
- DFS/BFS Graph Traversal
|
| 1036 |
+
- Bloom Filter
|
| 1037 |
+
- Trie (Prefix Tree)
|
ai_search.py
ADDED
|
@@ -0,0 +1,776 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
AI-Powered Search for Telegram Analytics
|
| 3 |
+
Supports: Ollama (local), Groq (free API), Google Gemini (free API)
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import sqlite3
|
| 7 |
+
import json
|
| 8 |
+
import re
|
| 9 |
+
from datetime import datetime
|
| 10 |
+
from typing import List, Dict, Any, Optional
|
| 11 |
+
import os
|
| 12 |
+
|
| 13 |
+
# Try to import AI libraries
|
| 14 |
+
try:
|
| 15 |
+
import requests
|
| 16 |
+
HAS_REQUESTS = True
|
| 17 |
+
except ImportError:
|
| 18 |
+
HAS_REQUESTS = False
|
| 19 |
+
|
| 20 |
+
try:
|
| 21 |
+
from groq import Groq
|
| 22 |
+
HAS_GROQ = True
|
| 23 |
+
except ImportError:
|
| 24 |
+
HAS_GROQ = False
|
| 25 |
+
|
| 26 |
+
try:
|
| 27 |
+
import google.generativeai as genai
|
| 28 |
+
HAS_GEMINI = True
|
| 29 |
+
except ImportError:
|
| 30 |
+
HAS_GEMINI = False
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
class AISearchEngine:
|
| 34 |
+
"""AI-powered natural language search for Telegram messages."""
|
| 35 |
+
|
| 36 |
+
def __init__(self, db_path: str, provider: str = "ollama", api_key: str = None):
|
| 37 |
+
"""
|
| 38 |
+
Initialize AI search engine.
|
| 39 |
+
|
| 40 |
+
Args:
|
| 41 |
+
db_path: Path to SQLite database
|
| 42 |
+
provider: "ollama", "groq", or "gemini"
|
| 43 |
+
api_key: API key for Groq or Gemini (not needed for Ollama)
|
| 44 |
+
"""
|
| 45 |
+
self.db_path = db_path
|
| 46 |
+
self.provider = provider
|
| 47 |
+
self.api_key = api_key or os.getenv(f"{provider.upper()}_API_KEY")
|
| 48 |
+
|
| 49 |
+
# Initialize provider
|
| 50 |
+
if provider == "groq" and HAS_GROQ:
|
| 51 |
+
self.client = Groq(api_key=self.api_key)
|
| 52 |
+
self.model = "llama-3.1-70b-versatile"
|
| 53 |
+
elif provider == "gemini" and HAS_GEMINI:
|
| 54 |
+
genai.configure(api_key=self.api_key)
|
| 55 |
+
# Using 2.5 Flash - free tier, fast, good for SQL
|
| 56 |
+
self.client = genai.GenerativeModel("gemini-2.5-flash")
|
| 57 |
+
elif provider == "ollama":
|
| 58 |
+
self.ollama_url = os.getenv("OLLAMA_URL", "http://localhost:11434")
|
| 59 |
+
self.model = os.getenv("OLLAMA_MODEL", "llama3.1")
|
| 60 |
+
else:
|
| 61 |
+
raise ValueError(f"Provider {provider} not available. Install required packages.")
|
| 62 |
+
|
| 63 |
+
def _get_db_schema(self) -> str:
|
| 64 |
+
"""Dynamically read schema from the actual database to stay in sync."""
|
| 65 |
+
conn = sqlite3.connect(self.db_path)
|
| 66 |
+
cursor = conn.cursor()
|
| 67 |
+
|
| 68 |
+
# Get all tables and their columns
|
| 69 |
+
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%' ORDER BY name")
|
| 70 |
+
tables = [row[0] for row in cursor.fetchall()]
|
| 71 |
+
|
| 72 |
+
schema_parts = ["Database Schema:"]
|
| 73 |
+
for table in tables:
|
| 74 |
+
cursor.execute(f"PRAGMA table_info({table})")
|
| 75 |
+
cols = cursor.fetchall()
|
| 76 |
+
col_names = [f"{c[1]} ({c[2]})" for c in cols]
|
| 77 |
+
schema_parts.append(f" - {table}: {', '.join(col_names)}")
|
| 78 |
+
|
| 79 |
+
# Note virtual tables (FTS5) separately
|
| 80 |
+
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND sql LIKE '%fts5%'")
|
| 81 |
+
fts_tables = [row[0] for row in cursor.fetchall()]
|
| 82 |
+
if fts_tables:
|
| 83 |
+
schema_parts.append(f"\n FTS5 tables (use MATCH for search): {', '.join(fts_tables)}")
|
| 84 |
+
|
| 85 |
+
conn.close()
|
| 86 |
+
|
| 87 |
+
schema_parts.append("""
|
| 88 |
+
Key notes:
|
| 89 |
+
- date_unixtime: Unix timestamp (INTEGER), use for date comparisons
|
| 90 |
+
- date: ISO format string (TEXT), use for display
|
| 91 |
+
- text_plain: Message text content
|
| 92 |
+
- text_length: Character count of the message
|
| 93 |
+
- has_links: 1 if message contains URL, 0 otherwise (note: plural)
|
| 94 |
+
- has_media: 1 if message has any media attachment
|
| 95 |
+
- has_photo: 1 if message has a photo specifically
|
| 96 |
+
- from_id: TEXT user ID (e.g., 'user356173100')
|
| 97 |
+
- For text search: SELECT * FROM messages WHERE id IN (SELECT rowid FROM messages_fts WHERE messages_fts MATCH 'term')
|
| 98 |
+
""")
|
| 99 |
+
|
| 100 |
+
return '\n'.join(schema_parts)
|
| 101 |
+
|
| 102 |
+
def _get_sample_data(self) -> str:
|
| 103 |
+
"""Get sample data for context."""
|
| 104 |
+
conn = sqlite3.connect(self.db_path)
|
| 105 |
+
cursor = conn.cursor()
|
| 106 |
+
|
| 107 |
+
# Get user list
|
| 108 |
+
cursor.execute("""
|
| 109 |
+
SELECT from_name, COUNT(*) as cnt
|
| 110 |
+
FROM messages
|
| 111 |
+
WHERE from_name IS NOT NULL
|
| 112 |
+
GROUP BY from_name
|
| 113 |
+
ORDER BY cnt DESC
|
| 114 |
+
LIMIT 10
|
| 115 |
+
""")
|
| 116 |
+
users = cursor.fetchall()
|
| 117 |
+
|
| 118 |
+
# Get date range
|
| 119 |
+
cursor.execute("SELECT MIN(date), MAX(date) FROM messages")
|
| 120 |
+
date_range = cursor.fetchone()
|
| 121 |
+
|
| 122 |
+
conn.close()
|
| 123 |
+
|
| 124 |
+
return f"""
|
| 125 |
+
Top users: {', '.join([u[0] for u in users])}
|
| 126 |
+
Date range: {date_range[0]} to {date_range[1]}
|
| 127 |
+
"""
|
| 128 |
+
|
| 129 |
+
def _build_prompt(self, user_query: str) -> str:
|
| 130 |
+
"""Build prompt for AI model."""
|
| 131 |
+
schema = self._get_db_schema()
|
| 132 |
+
sample = self._get_sample_data()
|
| 133 |
+
|
| 134 |
+
return f"""You are a SQL query generator for a Telegram chat database.
|
| 135 |
+
Your task is to convert natural language questions into SQLite queries.
|
| 136 |
+
|
| 137 |
+
{schema}
|
| 138 |
+
|
| 139 |
+
{sample}
|
| 140 |
+
|
| 141 |
+
IMPORTANT RULES:
|
| 142 |
+
1. Return ONLY valid SQLite query, no explanations
|
| 143 |
+
2. For text search, use: SELECT * FROM messages WHERE id IN (SELECT id FROM messages_fts WHERE messages_fts MATCH 'search_term')
|
| 144 |
+
3. For Hebrew text, the FTS5 will handle it correctly
|
| 145 |
+
4. Always include relevant columns like date, from_name, text_plain
|
| 146 |
+
5. Limit results to 50 unless specified
|
| 147 |
+
6. For "who" questions, GROUP BY from_name and COUNT(*)
|
| 148 |
+
7. For "when" questions, include date in SELECT
|
| 149 |
+
8. For threads/replies, JOIN messages m2 ON m1.reply_to_message_id = m2.id
|
| 150 |
+
|
| 151 |
+
User question: {user_query}
|
| 152 |
+
|
| 153 |
+
SQLite query:"""
|
| 154 |
+
|
| 155 |
+
def _call_ollama(self, prompt: str) -> str:
|
| 156 |
+
"""Call Ollama API."""
|
| 157 |
+
if not HAS_REQUESTS:
|
| 158 |
+
raise ImportError("requests library required for Ollama")
|
| 159 |
+
|
| 160 |
+
response = requests.post(
|
| 161 |
+
f"{self.ollama_url}/api/generate",
|
| 162 |
+
json={
|
| 163 |
+
"model": self.model,
|
| 164 |
+
"prompt": prompt,
|
| 165 |
+
"stream": False,
|
| 166 |
+
"options": {
|
| 167 |
+
"temperature": 0.1,
|
| 168 |
+
"num_predict": 500
|
| 169 |
+
}
|
| 170 |
+
},
|
| 171 |
+
timeout=60
|
| 172 |
+
)
|
| 173 |
+
response.raise_for_status()
|
| 174 |
+
return response.json()["response"]
|
| 175 |
+
|
| 176 |
+
def _call_groq(self, prompt: str) -> str:
|
| 177 |
+
"""Call Groq API."""
|
| 178 |
+
response = self.client.chat.completions.create(
|
| 179 |
+
model=self.model,
|
| 180 |
+
messages=[{"role": "user", "content": prompt}],
|
| 181 |
+
temperature=0.1,
|
| 182 |
+
max_tokens=500
|
| 183 |
+
)
|
| 184 |
+
return response.choices[0].message.content
|
| 185 |
+
|
| 186 |
+
def _call_gemini(self, prompt: str) -> str:
|
| 187 |
+
"""Call Google Gemini API."""
|
| 188 |
+
response = self.client.generate_content(prompt)
|
| 189 |
+
return response.text
|
| 190 |
+
|
| 191 |
+
def _generate_sql(self, user_query: str) -> str:
|
| 192 |
+
"""Generate SQL from natural language query."""
|
| 193 |
+
prompt = self._build_prompt(user_query)
|
| 194 |
+
|
| 195 |
+
if self.provider == "ollama":
|
| 196 |
+
response = self._call_ollama(prompt)
|
| 197 |
+
elif self.provider == "groq":
|
| 198 |
+
response = self._call_groq(prompt)
|
| 199 |
+
elif self.provider == "gemini":
|
| 200 |
+
response = self._call_gemini(prompt)
|
| 201 |
+
else:
|
| 202 |
+
raise ValueError(f"Unknown provider: {self.provider}")
|
| 203 |
+
|
| 204 |
+
# Extract SQL from response
|
| 205 |
+
sql = response.strip()
|
| 206 |
+
|
| 207 |
+
# Clean up common issues - handle various code block formats
|
| 208 |
+
sql = re.sub(r'^```\w*\s*', '', sql) # Remove opening ```sql or ```
|
| 209 |
+
sql = re.sub(r'\s*```$', '', sql) # Remove closing ```
|
| 210 |
+
sql = re.sub(r'^```', '', sql, flags=re.MULTILINE) # Remove any remaining ```
|
| 211 |
+
sql = sql.strip()
|
| 212 |
+
|
| 213 |
+
# Try to extract SELECT statement if there's text before it
|
| 214 |
+
select_match = re.search(r'(SELECT\s+.+?)(?:;|$)', sql, re.IGNORECASE | re.DOTALL)
|
| 215 |
+
if select_match:
|
| 216 |
+
sql = select_match.group(1).strip()
|
| 217 |
+
|
| 218 |
+
# Ensure it's a SELECT query for safety
|
| 219 |
+
if not sql.upper().startswith("SELECT"):
|
| 220 |
+
raise ValueError(f"AI generated non-SELECT query: {sql[:100]}")
|
| 221 |
+
|
| 222 |
+
return sql
|
| 223 |
+
|
| 224 |
+
def _execute_sql(self, sql: str) -> List[Dict[str, Any]]:
|
| 225 |
+
"""Execute SQL and return results as list of dicts."""
|
| 226 |
+
conn = sqlite3.connect(self.db_path)
|
| 227 |
+
conn.row_factory = sqlite3.Row
|
| 228 |
+
cursor = conn.cursor()
|
| 229 |
+
|
| 230 |
+
try:
|
| 231 |
+
cursor.execute(sql)
|
| 232 |
+
rows = cursor.fetchall()
|
| 233 |
+
results = [dict(row) for row in rows]
|
| 234 |
+
except sqlite3.Error as e:
|
| 235 |
+
results = [{"error": str(e), "sql": sql}]
|
| 236 |
+
finally:
|
| 237 |
+
conn.close()
|
| 238 |
+
|
| 239 |
+
return results
|
| 240 |
+
|
| 241 |
+
def _generate_answer(self, user_query: str, results: List[Dict], sql: str) -> str:
|
| 242 |
+
"""Generate natural language answer from results."""
|
| 243 |
+
if not results:
|
| 244 |
+
return "לא נמצאו תוצאות."
|
| 245 |
+
|
| 246 |
+
if "error" in results[0]:
|
| 247 |
+
return f"שגיאה בשאילתה: {results[0]['error']}"
|
| 248 |
+
|
| 249 |
+
# Build answer prompt
|
| 250 |
+
results_str = json.dumps(results[:20], ensure_ascii=False, indent=2)
|
| 251 |
+
|
| 252 |
+
answer_prompt = f"""Based on the following query results, provide a concise answer in Hebrew.
|
| 253 |
+
|
| 254 |
+
User question: {user_query}
|
| 255 |
+
|
| 256 |
+
Query results (JSON):
|
| 257 |
+
{results_str}
|
| 258 |
+
|
| 259 |
+
Total results: {len(results)}
|
| 260 |
+
|
| 261 |
+
Provide a helpful, concise answer in Hebrew. Include specific names, dates, and numbers from the results.
|
| 262 |
+
If showing a list, format it nicely. Keep it brief but informative."""
|
| 263 |
+
|
| 264 |
+
if self.provider == "ollama":
|
| 265 |
+
answer = self._call_ollama(answer_prompt)
|
| 266 |
+
elif self.provider == "groq":
|
| 267 |
+
answer = self._call_groq(answer_prompt)
|
| 268 |
+
elif self.provider == "gemini":
|
| 269 |
+
answer = self._call_gemini(answer_prompt)
|
| 270 |
+
|
| 271 |
+
return answer
|
| 272 |
+
|
| 273 |
+
def context_search(self, query: str, user_name: str = None) -> Dict[str, Any]:
|
| 274 |
+
"""
|
| 275 |
+
Hybrid context-aware search - combines FTS5 keyword search with AI reasoning.
|
| 276 |
+
|
| 277 |
+
1. AI extracts user name and relevant keywords from query
|
| 278 |
+
2. FTS5 finds messages matching keywords (fast, searches ALL messages)
|
| 279 |
+
3. AI reads relevant messages and reasons to find the answer
|
| 280 |
+
|
| 281 |
+
Example: "באיזה בית חולים האחות עובדת?"
|
| 282 |
+
- Extracts: user="האחות", keywords=["בית חולים", "עבודה", "מחלקה", "סורוקה", ...]
|
| 283 |
+
- FTS5 finds messages from האחות containing these keywords
|
| 284 |
+
- AI reads and infers the answer
|
| 285 |
+
"""
|
| 286 |
+
try:
|
| 287 |
+
conn = sqlite3.connect(self.db_path)
|
| 288 |
+
conn.row_factory = sqlite3.Row
|
| 289 |
+
|
| 290 |
+
# Step 1: AI extracts user name AND relevant keywords
|
| 291 |
+
extract_prompt = f"""Analyze this question and extract:
|
| 292 |
+
1. USER_NAME: The specific person being asked about (or NONE if not about a specific person)
|
| 293 |
+
2. KEYWORDS: Hebrew keywords to search for in their messages (related to the question topic)
|
| 294 |
+
|
| 295 |
+
Question: {query}
|
| 296 |
+
|
| 297 |
+
Return in this exact format (one per line):
|
| 298 |
+
USER_NAME: <name or NONE>
|
| 299 |
+
KEYWORDS: <comma-separated keywords in Hebrew>
|
| 300 |
+
|
| 301 |
+
Example for "באיזה בית חולים האחות עובדת?":
|
| 302 |
+
USER_NAME: האחות
|
| 303 |
+
KEYWORDS: בית חולים, עבודה, מחלקה, סורוקה, רמבם, איכילוב, שיבא, הדסה, טיפול נמרץ, אחות
|
| 304 |
+
|
| 305 |
+
Extract:"""
|
| 306 |
+
|
| 307 |
+
if self.provider == "gemini":
|
| 308 |
+
extraction = self._call_gemini(extract_prompt).strip()
|
| 309 |
+
elif self.provider == "groq":
|
| 310 |
+
extraction = self._call_groq(extract_prompt).strip()
|
| 311 |
+
else:
|
| 312 |
+
extraction = self._call_ollama(extract_prompt).strip()
|
| 313 |
+
|
| 314 |
+
# Parse extraction
|
| 315 |
+
user_name = None
|
| 316 |
+
keywords = []
|
| 317 |
+
for line in extraction.split('\n'):
|
| 318 |
+
if line.startswith('USER_NAME:'):
|
| 319 |
+
name = line.replace('USER_NAME:', '').strip()
|
| 320 |
+
if name.upper() != 'NONE' and len(name) < 50:
|
| 321 |
+
user_name = name
|
| 322 |
+
elif line.startswith('KEYWORDS:'):
|
| 323 |
+
kw_str = line.replace('KEYWORDS:', '').strip()
|
| 324 |
+
keywords = [k.strip() for k in kw_str.split(',') if k.strip()]
|
| 325 |
+
|
| 326 |
+
messages = []
|
| 327 |
+
|
| 328 |
+
# Step 2: Hybrid retrieval - FTS5 keyword search + recent messages
|
| 329 |
+
if user_name and keywords:
|
| 330 |
+
# Build FTS5 query for keywords
|
| 331 |
+
fts_query = ' OR '.join(keywords[:10]) # Limit to 10 keywords
|
| 332 |
+
|
| 333 |
+
# Search for messages from user containing keywords
|
| 334 |
+
cursor = conn.execute("""
|
| 335 |
+
SELECT date, from_name, text
|
| 336 |
+
FROM messages
|
| 337 |
+
WHERE from_name LIKE ?
|
| 338 |
+
AND id IN (SELECT id FROM messages_fts WHERE messages_fts MATCH ?)
|
| 339 |
+
ORDER BY date DESC
|
| 340 |
+
LIMIT 100
|
| 341 |
+
""", (f"%{user_name}%", fts_query))
|
| 342 |
+
messages = [dict(row) for row in cursor.fetchall()]
|
| 343 |
+
|
| 344 |
+
# Also add some recent messages for context (might contain relevant info without keywords)
|
| 345 |
+
cursor = conn.execute("""
|
| 346 |
+
SELECT date, from_name, text
|
| 347 |
+
FROM messages
|
| 348 |
+
WHERE from_name LIKE ?
|
| 349 |
+
ORDER BY date DESC
|
| 350 |
+
LIMIT 50
|
| 351 |
+
""", (f"%{user_name}%",))
|
| 352 |
+
recent = [dict(row) for row in cursor.fetchall()]
|
| 353 |
+
|
| 354 |
+
# Combine and deduplicate
|
| 355 |
+
seen_texts = {m['text'] for m in messages if m['text']}
|
| 356 |
+
for m in recent:
|
| 357 |
+
if m['text'] and m['text'] not in seen_texts:
|
| 358 |
+
messages.append(m)
|
| 359 |
+
seen_texts.add(m['text'])
|
| 360 |
+
|
| 361 |
+
elif user_name:
|
| 362 |
+
# No keywords, just get user's messages
|
| 363 |
+
cursor = conn.execute("""
|
| 364 |
+
SELECT date, from_name, text
|
| 365 |
+
FROM messages
|
| 366 |
+
WHERE from_name LIKE ?
|
| 367 |
+
ORDER BY date DESC
|
| 368 |
+
LIMIT 200
|
| 369 |
+
""", (f"%{user_name}%",))
|
| 370 |
+
messages = [dict(row) for row in cursor.fetchall()]
|
| 371 |
+
|
| 372 |
+
elif keywords:
|
| 373 |
+
# No user, search all messages for keywords
|
| 374 |
+
fts_query = ' OR '.join(keywords[:10])
|
| 375 |
+
cursor = conn.execute("""
|
| 376 |
+
SELECT date, from_name, text
|
| 377 |
+
FROM messages
|
| 378 |
+
WHERE id IN (SELECT id FROM messages_fts WHERE messages_fts MATCH ?)
|
| 379 |
+
ORDER BY date DESC
|
| 380 |
+
LIMIT 100
|
| 381 |
+
""", (fts_query,))
|
| 382 |
+
messages = [dict(row) for row in cursor.fetchall()]
|
| 383 |
+
|
| 384 |
+
else:
|
| 385 |
+
# Fallback: recent messages
|
| 386 |
+
cursor = conn.execute("""
|
| 387 |
+
SELECT date, from_name, text
|
| 388 |
+
FROM messages
|
| 389 |
+
WHERE text IS NOT NULL AND text != ''
|
| 390 |
+
ORDER BY date DESC
|
| 391 |
+
LIMIT 100
|
| 392 |
+
""")
|
| 393 |
+
messages = [dict(row) for row in cursor.fetchall()]
|
| 394 |
+
|
| 395 |
+
conn.close()
|
| 396 |
+
|
| 397 |
+
if not messages:
|
| 398 |
+
return {
|
| 399 |
+
"query": query,
|
| 400 |
+
"answer": "לא נמצאו הודעות רלוונטיות",
|
| 401 |
+
"context_messages": 0,
|
| 402 |
+
"keywords_used": keywords,
|
| 403 |
+
"mode": "context_search"
|
| 404 |
+
}
|
| 405 |
+
|
| 406 |
+
# Step 3: AI reasons over the retrieved messages
|
| 407 |
+
context_text = "\n".join([
|
| 408 |
+
f"[{m['date']}] {m['from_name']}: {m['text'][:500]}"
|
| 409 |
+
for m in messages if m['text']
|
| 410 |
+
])
|
| 411 |
+
|
| 412 |
+
reason_prompt = f"""You are analyzing a Telegram chat history to answer a question.
|
| 413 |
+
Read the messages carefully and infer the answer from context clues.
|
| 414 |
+
The user may not have stated things directly - look for hints, mentions, and implications.
|
| 415 |
+
|
| 416 |
+
Question: {query}
|
| 417 |
+
|
| 418 |
+
Chat messages (sorted by relevance and date):
|
| 419 |
+
{context_text}
|
| 420 |
+
|
| 421 |
+
Based on these messages, answer the question in Hebrew.
|
| 422 |
+
If you can infer information (like workplace, location, profession) from context clues, do so.
|
| 423 |
+
Cite specific messages when possible.
|
| 424 |
+
If you truly cannot find any relevant information, say so.
|
| 425 |
+
|
| 426 |
+
Answer:"""
|
| 427 |
+
|
| 428 |
+
if self.provider == "gemini":
|
| 429 |
+
answer = self._call_gemini(reason_prompt)
|
| 430 |
+
elif self.provider == "groq":
|
| 431 |
+
answer = self._call_groq(reason_prompt)
|
| 432 |
+
else:
|
| 433 |
+
answer = self._call_ollama(reason_prompt)
|
| 434 |
+
|
| 435 |
+
return {
|
| 436 |
+
"query": query,
|
| 437 |
+
"answer": answer,
|
| 438 |
+
"context_user": user_name,
|
| 439 |
+
"context_messages": len(messages),
|
| 440 |
+
"keywords_used": keywords,
|
| 441 |
+
"mode": "context_search"
|
| 442 |
+
}
|
| 443 |
+
except Exception as e:
|
| 444 |
+
return {
|
| 445 |
+
"query": query,
|
| 446 |
+
"error": f"Context search error: {str(e)}",
|
| 447 |
+
"mode": "context_search"
|
| 448 |
+
}
|
| 449 |
+
|
| 450 |
+
def search(self, query: str, generate_answer: bool = True) -> Dict[str, Any]:
|
| 451 |
+
"""
|
| 452 |
+
Perform AI-powered search.
|
| 453 |
+
|
| 454 |
+
Args:
|
| 455 |
+
query: Natural language question in Hebrew or English
|
| 456 |
+
generate_answer: Whether to generate natural language answer
|
| 457 |
+
|
| 458 |
+
Returns:
|
| 459 |
+
Dict with sql, results, and optionally answer
|
| 460 |
+
"""
|
| 461 |
+
try:
|
| 462 |
+
# Generate SQL
|
| 463 |
+
sql = self._generate_sql(query)
|
| 464 |
+
|
| 465 |
+
# Execute query
|
| 466 |
+
results = self._execute_sql(sql)
|
| 467 |
+
|
| 468 |
+
response = {
|
| 469 |
+
"query": query,
|
| 470 |
+
"sql": sql,
|
| 471 |
+
"results": results,
|
| 472 |
+
"count": len(results)
|
| 473 |
+
}
|
| 474 |
+
|
| 475 |
+
# Generate natural language answer
|
| 476 |
+
if generate_answer and results and "error" not in results[0]:
|
| 477 |
+
response["answer"] = self._generate_answer(query, results, sql)
|
| 478 |
+
|
| 479 |
+
return response
|
| 480 |
+
|
| 481 |
+
except Exception as e:
|
| 482 |
+
return {
|
| 483 |
+
"query": query,
|
| 484 |
+
"error": str(e),
|
| 485 |
+
"results": []
|
| 486 |
+
}
|
| 487 |
+
|
| 488 |
+
def get_thread(self, message_id: int) -> List[Dict[str, Any]]:
|
| 489 |
+
"""Get full conversation thread for a message."""
|
| 490 |
+
conn = sqlite3.connect(self.db_path)
|
| 491 |
+
conn.row_factory = sqlite3.Row
|
| 492 |
+
cursor = conn.cursor()
|
| 493 |
+
|
| 494 |
+
thread = []
|
| 495 |
+
visited = set()
|
| 496 |
+
|
| 497 |
+
def get_parent(msg_id):
|
| 498 |
+
"""Recursively get parent messages."""
|
| 499 |
+
if msg_id in visited:
|
| 500 |
+
return
|
| 501 |
+
visited.add(msg_id)
|
| 502 |
+
|
| 503 |
+
cursor.execute("""
|
| 504 |
+
SELECT message_id, date, from_name, text, reply_to_message_id
|
| 505 |
+
FROM messages WHERE message_id = ?
|
| 506 |
+
""", (msg_id,))
|
| 507 |
+
row = cursor.fetchone()
|
| 508 |
+
|
| 509 |
+
if row:
|
| 510 |
+
if row['reply_to_message_id']:
|
| 511 |
+
get_parent(row['reply_to_message_id'])
|
| 512 |
+
thread.append(dict(row))
|
| 513 |
+
|
| 514 |
+
def get_children(msg_id):
|
| 515 |
+
"""Get all replies to a message."""
|
| 516 |
+
cursor.execute("""
|
| 517 |
+
SELECT message_id, date, from_name, text, reply_to_message_id
|
| 518 |
+
FROM messages WHERE reply_to_message_id = ?
|
| 519 |
+
ORDER BY date
|
| 520 |
+
""", (msg_id,))
|
| 521 |
+
|
| 522 |
+
for row in cursor.fetchall():
|
| 523 |
+
if row['message_id'] not in visited:
|
| 524 |
+
visited.add(row['message_id'])
|
| 525 |
+
thread.append(dict(row))
|
| 526 |
+
get_children(row['message_id'])
|
| 527 |
+
|
| 528 |
+
# Get the original message and its parents
|
| 529 |
+
get_parent(message_id)
|
| 530 |
+
|
| 531 |
+
# Get all replies
|
| 532 |
+
get_children(message_id)
|
| 533 |
+
|
| 534 |
+
conn.close()
|
| 535 |
+
|
| 536 |
+
# Sort by date
|
| 537 |
+
thread.sort(key=lambda x: x['date'])
|
| 538 |
+
|
| 539 |
+
return thread
|
| 540 |
+
|
| 541 |
+
def find_similar_messages(self, message_id: int, limit: int = 10) -> List[Dict[str, Any]]:
|
| 542 |
+
"""Find messages similar to the given message using trigrams."""
|
| 543 |
+
conn = sqlite3.connect(self.db_path)
|
| 544 |
+
conn.row_factory = sqlite3.Row
|
| 545 |
+
cursor = conn.cursor()
|
| 546 |
+
|
| 547 |
+
# Get the original message
|
| 548 |
+
cursor.execute("SELECT text FROM messages WHERE message_id = ?", (message_id,))
|
| 549 |
+
row = cursor.fetchone()
|
| 550 |
+
|
| 551 |
+
if not row or not row['text']:
|
| 552 |
+
return []
|
| 553 |
+
|
| 554 |
+
# Use FTS5 to find similar messages
|
| 555 |
+
words = row['text'].split()[:5] # Use first 5 words
|
| 556 |
+
search_term = ' OR '.join(words)
|
| 557 |
+
|
| 558 |
+
cursor.execute("""
|
| 559 |
+
SELECT m.message_id, m.date, m.from_name, m.text
|
| 560 |
+
FROM messages m
|
| 561 |
+
WHERE m.id IN (
|
| 562 |
+
SELECT id FROM messages_fts
|
| 563 |
+
WHERE messages_fts MATCH ?
|
| 564 |
+
)
|
| 565 |
+
AND m.message_id != ?
|
| 566 |
+
LIMIT ?
|
| 567 |
+
""", (search_term, message_id, limit))
|
| 568 |
+
|
| 569 |
+
results = [dict(row) for row in cursor.fetchall()]
|
| 570 |
+
conn.close()
|
| 571 |
+
|
| 572 |
+
return results
|
| 573 |
+
|
| 574 |
+
|
| 575 |
+
class ChatViewer:
|
| 576 |
+
"""View chat messages like Telegram."""
|
| 577 |
+
|
| 578 |
+
def __init__(self, db_path: str):
|
| 579 |
+
self.db_path = db_path
|
| 580 |
+
|
| 581 |
+
def get_messages(self,
|
| 582 |
+
offset: int = 0,
|
| 583 |
+
limit: int = 50,
|
| 584 |
+
user_id: str = None,
|
| 585 |
+
search: str = None,
|
| 586 |
+
date_from: str = None,
|
| 587 |
+
date_to: str = None,
|
| 588 |
+
has_media: bool = None,
|
| 589 |
+
has_link: bool = None) -> Dict[str, Any]:
|
| 590 |
+
"""
|
| 591 |
+
Get messages with Telegram-like pagination.
|
| 592 |
+
|
| 593 |
+
Returns messages in reverse chronological order (newest first).
|
| 594 |
+
"""
|
| 595 |
+
conn = sqlite3.connect(self.db_path)
|
| 596 |
+
conn.row_factory = sqlite3.Row
|
| 597 |
+
cursor = conn.cursor()
|
| 598 |
+
|
| 599 |
+
# Build query
|
| 600 |
+
conditions = []
|
| 601 |
+
params = []
|
| 602 |
+
|
| 603 |
+
if user_id:
|
| 604 |
+
conditions.append("from_id = ?")
|
| 605 |
+
params.append(user_id)
|
| 606 |
+
|
| 607 |
+
if date_from:
|
| 608 |
+
conditions.append("date >= ?")
|
| 609 |
+
params.append(date_from)
|
| 610 |
+
|
| 611 |
+
if date_to:
|
| 612 |
+
conditions.append("date <= ?")
|
| 613 |
+
params.append(date_to)
|
| 614 |
+
|
| 615 |
+
if has_media is not None:
|
| 616 |
+
if has_media:
|
| 617 |
+
conditions.append("media_type IS NOT NULL")
|
| 618 |
+
else:
|
| 619 |
+
conditions.append("media_type IS NULL")
|
| 620 |
+
|
| 621 |
+
if has_link is not None:
|
| 622 |
+
conditions.append("has_link = ?")
|
| 623 |
+
params.append(1 if has_link else 0)
|
| 624 |
+
|
| 625 |
+
# Handle search
|
| 626 |
+
if search:
|
| 627 |
+
conditions.append("""id IN (
|
| 628 |
+
SELECT id FROM messages_fts WHERE messages_fts MATCH ?
|
| 629 |
+
)""")
|
| 630 |
+
params.append(search)
|
| 631 |
+
|
| 632 |
+
where_clause = " AND ".join(conditions) if conditions else "1=1"
|
| 633 |
+
|
| 634 |
+
# Get total count
|
| 635 |
+
cursor.execute(f"SELECT COUNT(*) FROM messages WHERE {where_clause}", params)
|
| 636 |
+
total = cursor.fetchone()[0]
|
| 637 |
+
|
| 638 |
+
# Get messages
|
| 639 |
+
query = f"""
|
| 640 |
+
SELECT
|
| 641 |
+
m.message_id,
|
| 642 |
+
m.date,
|
| 643 |
+
m.from_id,
|
| 644 |
+
m.from_name,
|
| 645 |
+
m.text,
|
| 646 |
+
m.reply_to_message_id,
|
| 647 |
+
m.forwarded_from,
|
| 648 |
+
m.media_type,
|
| 649 |
+
m.has_link,
|
| 650 |
+
m.char_count,
|
| 651 |
+
r.from_name as reply_to_name,
|
| 652 |
+
r.text as reply_to_text
|
| 653 |
+
FROM messages m
|
| 654 |
+
LEFT JOIN messages r ON m.reply_to_message_id = r.message_id
|
| 655 |
+
WHERE {where_clause}
|
| 656 |
+
ORDER BY m.date DESC
|
| 657 |
+
LIMIT ? OFFSET ?
|
| 658 |
+
"""
|
| 659 |
+
params.extend([limit, offset])
|
| 660 |
+
|
| 661 |
+
cursor.execute(query, params)
|
| 662 |
+
messages = [dict(row) for row in cursor.fetchall()]
|
| 663 |
+
|
| 664 |
+
conn.close()
|
| 665 |
+
|
| 666 |
+
return {
|
| 667 |
+
"messages": messages,
|
| 668 |
+
"total": total,
|
| 669 |
+
"offset": offset,
|
| 670 |
+
"limit": limit,
|
| 671 |
+
"has_more": offset + limit < total
|
| 672 |
+
}
|
| 673 |
+
|
| 674 |
+
def get_message_context(self, message_id: int, before: int = 10, after: int = 10) -> Dict[str, Any]:
|
| 675 |
+
"""Get messages around a specific message (for context view)."""
|
| 676 |
+
conn = sqlite3.connect(self.db_path)
|
| 677 |
+
conn.row_factory = sqlite3.Row
|
| 678 |
+
cursor = conn.cursor()
|
| 679 |
+
|
| 680 |
+
# Get the target message date
|
| 681 |
+
cursor.execute("SELECT date FROM messages WHERE message_id = ?", (message_id,))
|
| 682 |
+
row = cursor.fetchone()
|
| 683 |
+
|
| 684 |
+
if not row:
|
| 685 |
+
return {"messages": [], "target_id": message_id}
|
| 686 |
+
|
| 687 |
+
target_date = row['date']
|
| 688 |
+
|
| 689 |
+
# Get messages before
|
| 690 |
+
cursor.execute("""
|
| 691 |
+
SELECT message_id, date, from_id, from_name, text,
|
| 692 |
+
reply_to_message_id, media_type, has_link
|
| 693 |
+
FROM messages
|
| 694 |
+
WHERE date < ?
|
| 695 |
+
ORDER BY date DESC
|
| 696 |
+
LIMIT ?
|
| 697 |
+
""", (target_date, before))
|
| 698 |
+
before_msgs = list(reversed([dict(row) for row in cursor.fetchall()]))
|
| 699 |
+
|
| 700 |
+
# Get target message
|
| 701 |
+
cursor.execute("""
|
| 702 |
+
SELECT message_id, date, from_id, from_name, text,
|
| 703 |
+
reply_to_message_id, media_type, has_link
|
| 704 |
+
FROM messages
|
| 705 |
+
WHERE message_id = ?
|
| 706 |
+
""", (message_id,))
|
| 707 |
+
target_msg = dict(cursor.fetchone())
|
| 708 |
+
|
| 709 |
+
# Get messages after
|
| 710 |
+
cursor.execute("""
|
| 711 |
+
SELECT message_id, date, from_id, from_name, text,
|
| 712 |
+
reply_to_message_id, media_type, has_link
|
| 713 |
+
FROM messages
|
| 714 |
+
WHERE date > ?
|
| 715 |
+
ORDER BY date ASC
|
| 716 |
+
LIMIT ?
|
| 717 |
+
""", (target_date, after))
|
| 718 |
+
after_msgs = [dict(row) for row in cursor.fetchall()]
|
| 719 |
+
|
| 720 |
+
conn.close()
|
| 721 |
+
|
| 722 |
+
return {
|
| 723 |
+
"messages": before_msgs + [target_msg] + after_msgs,
|
| 724 |
+
"target_id": message_id
|
| 725 |
+
}
|
| 726 |
+
|
| 727 |
+
def get_user_conversation(self, user1_id: str, user2_id: str, limit: int = 100) -> List[Dict]:
|
| 728 |
+
"""Get conversation between two users (their replies to each other)."""
|
| 729 |
+
conn = sqlite3.connect(self.db_path)
|
| 730 |
+
conn.row_factory = sqlite3.Row
|
| 731 |
+
cursor = conn.cursor()
|
| 732 |
+
|
| 733 |
+
cursor.execute("""
|
| 734 |
+
SELECT m1.message_id, m1.date, m1.from_id, m1.from_name, m1.text,
|
| 735 |
+
m1.reply_to_message_id, m2.from_name as reply_to_name
|
| 736 |
+
FROM messages m1
|
| 737 |
+
LEFT JOIN messages m2 ON m1.reply_to_message_id = m2.message_id
|
| 738 |
+
WHERE (m1.from_id = ? AND m2.from_id = ?)
|
| 739 |
+
OR (m1.from_id = ? AND m2.from_id = ?)
|
| 740 |
+
ORDER BY m1.date DESC
|
| 741 |
+
LIMIT ?
|
| 742 |
+
""", (user1_id, user2_id, user2_id, user1_id, limit))
|
| 743 |
+
|
| 744 |
+
results = [dict(row) for row in cursor.fetchall()]
|
| 745 |
+
conn.close()
|
| 746 |
+
|
| 747 |
+
return results
|
| 748 |
+
|
| 749 |
+
|
| 750 |
+
# CLI for testing
|
| 751 |
+
if __name__ == "__main__":
|
| 752 |
+
import argparse
|
| 753 |
+
|
| 754 |
+
parser = argparse.ArgumentParser(description="AI-powered Telegram search")
|
| 755 |
+
parser.add_argument("--db", required=True, help="Database path")
|
| 756 |
+
parser.add_argument("--provider", default="ollama", choices=["ollama", "groq", "gemini"])
|
| 757 |
+
parser.add_argument("--query", help="Search query")
|
| 758 |
+
parser.add_argument("--api-key", help="API key for cloud providers")
|
| 759 |
+
|
| 760 |
+
args = parser.parse_args()
|
| 761 |
+
|
| 762 |
+
if args.query:
|
| 763 |
+
engine = AISearchEngine(args.db, args.provider, args.api_key)
|
| 764 |
+
result = engine.search(args.query)
|
| 765 |
+
|
| 766 |
+
print(f"\nQuery: {result['query']}")
|
| 767 |
+
print(f"SQL: {result.get('sql', 'N/A')}")
|
| 768 |
+
print(f"Results: {result.get('count', 0)}")
|
| 769 |
+
|
| 770 |
+
if 'answer' in result:
|
| 771 |
+
print(f"\nAnswer:\n{result['answer']}")
|
| 772 |
+
|
| 773 |
+
if result.get('results'):
|
| 774 |
+
print(f"\nFirst 3 results:")
|
| 775 |
+
for r in result['results'][:3]:
|
| 776 |
+
print(json.dumps(r, ensure_ascii=False, indent=2))
|
algorithms.py
ADDED
|
@@ -0,0 +1,819 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Advanced Algorithms Module for Telegram Chat Analysis
|
| 4 |
+
|
| 5 |
+
Implements algorithms from Data Structures course:
|
| 6 |
+
- LCS (Longest Common Subsequence) - Similar message detection
|
| 7 |
+
- Heap-based Top-K - Efficient ranking without full sort
|
| 8 |
+
- Selection Algorithm (Median of Medians) - O(n) percentiles
|
| 9 |
+
- Rank Tree (Order Statistics Tree) - O(log n) rank queries
|
| 10 |
+
- Bucket Sort - O(n) time-based histograms
|
| 11 |
+
|
| 12 |
+
All algorithms are optimized for the chat indexing use case.
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
import heapq
|
| 16 |
+
from typing import Any, Callable, Generator, Optional
|
| 17 |
+
from collections import defaultdict
|
| 18 |
+
from dataclasses import dataclass, field
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
# ============================================
|
| 22 |
+
# LCS - LONGEST COMMON SUBSEQUENCE
|
| 23 |
+
# ============================================
|
| 24 |
+
|
| 25 |
+
def lcs_length(s1: str, s2: str) -> int:
|
| 26 |
+
"""
|
| 27 |
+
Calculate length of Longest Common Subsequence.
|
| 28 |
+
|
| 29 |
+
Time: O(m * n)
|
| 30 |
+
Space: O(min(m, n)) - optimized to use less space
|
| 31 |
+
|
| 32 |
+
Use case: Measure similarity between two messages.
|
| 33 |
+
"""
|
| 34 |
+
# Ensure s1 is the shorter string for space optimization
|
| 35 |
+
if len(s1) > len(s2):
|
| 36 |
+
s1, s2 = s2, s1
|
| 37 |
+
|
| 38 |
+
m, n = len(s1), len(s2)
|
| 39 |
+
|
| 40 |
+
# Use two rows instead of full matrix
|
| 41 |
+
prev = [0] * (m + 1)
|
| 42 |
+
curr = [0] * (m + 1)
|
| 43 |
+
|
| 44 |
+
for j in range(1, n + 1):
|
| 45 |
+
for i in range(1, m + 1):
|
| 46 |
+
if s1[i-1] == s2[j-1]:
|
| 47 |
+
curr[i] = prev[i-1] + 1
|
| 48 |
+
else:
|
| 49 |
+
curr[i] = max(prev[i], curr[i-1])
|
| 50 |
+
prev, curr = curr, prev
|
| 51 |
+
|
| 52 |
+
return prev[m]
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def lcs_string(s1: str, s2: str) -> str:
|
| 56 |
+
"""
|
| 57 |
+
Find the actual Longest Common Subsequence string.
|
| 58 |
+
|
| 59 |
+
Time: O(m * n)
|
| 60 |
+
Space: O(m * n)
|
| 61 |
+
|
| 62 |
+
Use case: Find common content between messages.
|
| 63 |
+
"""
|
| 64 |
+
m, n = len(s1), len(s2)
|
| 65 |
+
|
| 66 |
+
# Build full DP table
|
| 67 |
+
dp = [[0] * (n + 1) for _ in range(m + 1)]
|
| 68 |
+
|
| 69 |
+
for i in range(1, m + 1):
|
| 70 |
+
for j in range(1, n + 1):
|
| 71 |
+
if s1[i-1] == s2[j-1]:
|
| 72 |
+
dp[i][j] = dp[i-1][j-1] + 1
|
| 73 |
+
else:
|
| 74 |
+
dp[i][j] = max(dp[i-1][j], dp[i][j-1])
|
| 75 |
+
|
| 76 |
+
# Backtrack to find the actual subsequence
|
| 77 |
+
result = []
|
| 78 |
+
i, j = m, n
|
| 79 |
+
while i > 0 and j > 0:
|
| 80 |
+
if s1[i-1] == s2[j-1]:
|
| 81 |
+
result.append(s1[i-1])
|
| 82 |
+
i -= 1
|
| 83 |
+
j -= 1
|
| 84 |
+
elif dp[i-1][j] > dp[i][j-1]:
|
| 85 |
+
i -= 1
|
| 86 |
+
else:
|
| 87 |
+
j -= 1
|
| 88 |
+
|
| 89 |
+
return ''.join(reversed(result))
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def lcs_similarity(s1: str, s2: str) -> float:
|
| 93 |
+
"""
|
| 94 |
+
Calculate LCS-based similarity ratio between two strings.
|
| 95 |
+
|
| 96 |
+
Returns value between 0 (no similarity) and 1 (identical).
|
| 97 |
+
|
| 98 |
+
Use case: Detect near-duplicate messages, reposts.
|
| 99 |
+
"""
|
| 100 |
+
if not s1 or not s2:
|
| 101 |
+
return 0.0
|
| 102 |
+
|
| 103 |
+
lcs_len = lcs_length(s1, s2)
|
| 104 |
+
max_len = max(len(s1), len(s2))
|
| 105 |
+
|
| 106 |
+
return lcs_len / max_len
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
def find_similar_messages(
|
| 110 |
+
messages: list[tuple[int, str]],
|
| 111 |
+
threshold: float = 0.7,
|
| 112 |
+
min_length: int = 20
|
| 113 |
+
) -> list[tuple[int, int, float]]:
|
| 114 |
+
"""
|
| 115 |
+
Find pairs of similar messages using LCS.
|
| 116 |
+
|
| 117 |
+
Args:
|
| 118 |
+
messages: List of (id, text) tuples
|
| 119 |
+
threshold: Minimum similarity to report (0-1)
|
| 120 |
+
min_length: Minimum message length to consider
|
| 121 |
+
|
| 122 |
+
Returns:
|
| 123 |
+
List of (id1, id2, similarity) tuples
|
| 124 |
+
|
| 125 |
+
Time: O(n² * m) where n=messages, m=avg length
|
| 126 |
+
"""
|
| 127 |
+
# Filter by length
|
| 128 |
+
filtered = [(id_, text) for id_, text in messages if len(text) >= min_length]
|
| 129 |
+
|
| 130 |
+
similar_pairs = []
|
| 131 |
+
n = len(filtered)
|
| 132 |
+
|
| 133 |
+
for i in range(n):
|
| 134 |
+
for j in range(i + 1, n):
|
| 135 |
+
id1, text1 = filtered[i]
|
| 136 |
+
id2, text2 = filtered[j]
|
| 137 |
+
|
| 138 |
+
# Quick length check - if lengths differ too much, skip
|
| 139 |
+
len_ratio = min(len(text1), len(text2)) / max(len(text1), len(text2))
|
| 140 |
+
if len_ratio < threshold:
|
| 141 |
+
continue
|
| 142 |
+
|
| 143 |
+
sim = lcs_similarity(text1, text2)
|
| 144 |
+
if sim >= threshold:
|
| 145 |
+
similar_pairs.append((id1, id2, sim))
|
| 146 |
+
|
| 147 |
+
return sorted(similar_pairs, key=lambda x: x[2], reverse=True)
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
# ============================================
|
| 151 |
+
# HEAP-BASED TOP-K
|
| 152 |
+
# ============================================
|
| 153 |
+
|
| 154 |
+
class TopK:
|
| 155 |
+
"""
|
| 156 |
+
Efficient Top-K tracker using min-heap.
|
| 157 |
+
|
| 158 |
+
Maintains the K largest elements seen so far.
|
| 159 |
+
|
| 160 |
+
Time: O(n log k) for n insertions
|
| 161 |
+
Space: O(k)
|
| 162 |
+
|
| 163 |
+
Use case: Top users, top words, top domains without sorting all data.
|
| 164 |
+
"""
|
| 165 |
+
|
| 166 |
+
def __init__(self, k: int, key: Callable[[Any], float] = None):
|
| 167 |
+
"""
|
| 168 |
+
Args:
|
| 169 |
+
k: Number of top elements to track
|
| 170 |
+
key: Function to extract comparison value (default: identity)
|
| 171 |
+
"""
|
| 172 |
+
self.k = k
|
| 173 |
+
self.key = key or (lambda x: x)
|
| 174 |
+
self.heap: list[tuple[float, int, Any]] = [] # (key_value, counter, item)
|
| 175 |
+
self.counter = 0 # For stable sorting
|
| 176 |
+
|
| 177 |
+
def push(self, item: Any) -> None:
|
| 178 |
+
"""Add an item. O(log k)."""
|
| 179 |
+
key_val = self.key(item)
|
| 180 |
+
|
| 181 |
+
if len(self.heap) < self.k:
|
| 182 |
+
heapq.heappush(self.heap, (key_val, self.counter, item))
|
| 183 |
+
elif key_val > self.heap[0][0]:
|
| 184 |
+
heapq.heapreplace(self.heap, (key_val, self.counter, item))
|
| 185 |
+
|
| 186 |
+
self.counter += 1
|
| 187 |
+
|
| 188 |
+
def get_top(self) -> list[Any]:
|
| 189 |
+
"""Get top K items sorted by key descending. O(k log k)."""
|
| 190 |
+
return [item for _, _, item in sorted(self.heap, reverse=True)]
|
| 191 |
+
|
| 192 |
+
def __len__(self) -> int:
|
| 193 |
+
return len(self.heap)
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
def top_k_frequent(items: list[Any], k: int) -> list[tuple[Any, int]]:
|
| 197 |
+
"""
|
| 198 |
+
Find top K most frequent items.
|
| 199 |
+
|
| 200 |
+
Time: O(n + m log k) where n=items, m=unique items
|
| 201 |
+
Space: O(m)
|
| 202 |
+
|
| 203 |
+
Use case: Top words, top users, top mentioned usernames.
|
| 204 |
+
"""
|
| 205 |
+
# Count frequencies
|
| 206 |
+
freq = defaultdict(int)
|
| 207 |
+
for item in items:
|
| 208 |
+
freq[item] += 1
|
| 209 |
+
|
| 210 |
+
# Use heap to find top K
|
| 211 |
+
top = TopK(k, key=lambda x: x[1])
|
| 212 |
+
for item, count in freq.items():
|
| 213 |
+
top.push((item, count))
|
| 214 |
+
|
| 215 |
+
return top.get_top()
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
def top_k_by_field(
|
| 219 |
+
records: list[dict],
|
| 220 |
+
field: str,
|
| 221 |
+
k: int,
|
| 222 |
+
reverse: bool = True
|
| 223 |
+
) -> list[dict]:
|
| 224 |
+
"""
|
| 225 |
+
Get top K records by a specific field value.
|
| 226 |
+
|
| 227 |
+
Time: O(n log k)
|
| 228 |
+
|
| 229 |
+
Use case: Top messages by length, top users by message count.
|
| 230 |
+
"""
|
| 231 |
+
if reverse:
|
| 232 |
+
# Max K - use min heap
|
| 233 |
+
top = TopK(k, key=lambda x: x.get(field, 0))
|
| 234 |
+
else:
|
| 235 |
+
# Min K - negate the key
|
| 236 |
+
top = TopK(k, key=lambda x: -x.get(field, 0))
|
| 237 |
+
|
| 238 |
+
for record in records:
|
| 239 |
+
top.push(record)
|
| 240 |
+
|
| 241 |
+
return top.get_top()
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
# ============================================
|
| 245 |
+
# SELECTION ALGORITHM (MEDIAN OF MEDIANS)
|
| 246 |
+
# ============================================
|
| 247 |
+
|
| 248 |
+
def partition(arr: list, left: int, right: int, pivot_idx: int) -> int:
|
| 249 |
+
"""
|
| 250 |
+
Partition array around pivot (Lomuto scheme).
|
| 251 |
+
|
| 252 |
+
Returns final position of pivot.
|
| 253 |
+
"""
|
| 254 |
+
pivot_val = arr[pivot_idx]
|
| 255 |
+
|
| 256 |
+
# Move pivot to end
|
| 257 |
+
arr[pivot_idx], arr[right] = arr[right], arr[pivot_idx]
|
| 258 |
+
|
| 259 |
+
store_idx = left
|
| 260 |
+
for i in range(left, right):
|
| 261 |
+
if arr[i] < pivot_val:
|
| 262 |
+
arr[store_idx], arr[i] = arr[i], arr[store_idx]
|
| 263 |
+
store_idx += 1
|
| 264 |
+
|
| 265 |
+
# Move pivot to final position
|
| 266 |
+
arr[store_idx], arr[right] = arr[right], arr[store_idx]
|
| 267 |
+
|
| 268 |
+
return store_idx
|
| 269 |
+
|
| 270 |
+
|
| 271 |
+
def median_of_five(arr: list, left: int, right: int) -> int:
|
| 272 |
+
"""Find median of up to 5 elements, return its index."""
|
| 273 |
+
sub = [(arr[i], i) for i in range(left, right + 1)]
|
| 274 |
+
sub.sort()
|
| 275 |
+
return sub[len(sub) // 2][1]
|
| 276 |
+
|
| 277 |
+
|
| 278 |
+
def median_of_medians(arr: list, left: int, right: int) -> int:
|
| 279 |
+
"""
|
| 280 |
+
Find approximate median using median-of-medians algorithm.
|
| 281 |
+
|
| 282 |
+
Returns index of the pivot.
|
| 283 |
+
"""
|
| 284 |
+
n = right - left + 1
|
| 285 |
+
|
| 286 |
+
if n <= 5:
|
| 287 |
+
return median_of_five(arr, left, right)
|
| 288 |
+
|
| 289 |
+
# Divide into groups of 5 and find medians
|
| 290 |
+
medians = []
|
| 291 |
+
for i in range(left, right + 1, 5):
|
| 292 |
+
group_right = min(i + 4, right)
|
| 293 |
+
median_idx = median_of_five(arr, i, group_right)
|
| 294 |
+
medians.append(arr[median_idx])
|
| 295 |
+
|
| 296 |
+
# Recursively find median of medians
|
| 297 |
+
# For simplicity, use sorting for small arrays
|
| 298 |
+
medians.sort()
|
| 299 |
+
pivot_val = medians[len(medians) // 2]
|
| 300 |
+
|
| 301 |
+
# Find index of this value in original array
|
| 302 |
+
for i in range(left, right + 1):
|
| 303 |
+
if arr[i] == pivot_val:
|
| 304 |
+
return i
|
| 305 |
+
|
| 306 |
+
return left # Fallback
|
| 307 |
+
|
| 308 |
+
|
| 309 |
+
def quickselect(arr: list, k: int) -> Any:
|
| 310 |
+
"""
|
| 311 |
+
Find the k-th smallest element (0-indexed).
|
| 312 |
+
|
| 313 |
+
Time: O(n) average, O(n) worst case with median-of-medians
|
| 314 |
+
Space: O(1) - in-place
|
| 315 |
+
|
| 316 |
+
Use case: Find median, percentiles without sorting.
|
| 317 |
+
"""
|
| 318 |
+
arr = arr.copy() # Don't modify original
|
| 319 |
+
left, right = 0, len(arr) - 1
|
| 320 |
+
|
| 321 |
+
while left < right:
|
| 322 |
+
# Use median of medians for pivot selection
|
| 323 |
+
pivot_idx = median_of_medians(arr, left, right)
|
| 324 |
+
pivot_idx = partition(arr, left, right, pivot_idx)
|
| 325 |
+
|
| 326 |
+
if k == pivot_idx:
|
| 327 |
+
return arr[k]
|
| 328 |
+
elif k < pivot_idx:
|
| 329 |
+
right = pivot_idx - 1
|
| 330 |
+
else:
|
| 331 |
+
left = pivot_idx + 1
|
| 332 |
+
|
| 333 |
+
return arr[left]
|
| 334 |
+
|
| 335 |
+
|
| 336 |
+
def find_median(arr: list) -> float:
|
| 337 |
+
"""
|
| 338 |
+
Find median in O(n) time.
|
| 339 |
+
|
| 340 |
+
Use case: Median message length, median activity time.
|
| 341 |
+
"""
|
| 342 |
+
n = len(arr)
|
| 343 |
+
if n == 0:
|
| 344 |
+
return 0.0
|
| 345 |
+
|
| 346 |
+
if n % 2 == 1:
|
| 347 |
+
return float(quickselect(arr, n // 2))
|
| 348 |
+
else:
|
| 349 |
+
return (quickselect(arr, n // 2 - 1) + quickselect(arr, n // 2)) / 2
|
| 350 |
+
|
| 351 |
+
|
| 352 |
+
def find_percentile(arr: list, p: float) -> float:
|
| 353 |
+
"""
|
| 354 |
+
Find the p-th percentile (0-100) in O(n) time.
|
| 355 |
+
|
| 356 |
+
Use case: 90th percentile response time, activity distribution.
|
| 357 |
+
"""
|
| 358 |
+
if not arr:
|
| 359 |
+
return 0.0
|
| 360 |
+
|
| 361 |
+
k = int((p / 100) * (len(arr) - 1))
|
| 362 |
+
return float(quickselect(arr, k))
|
| 363 |
+
|
| 364 |
+
|
| 365 |
+
# ============================================
|
| 366 |
+
# RANK TREE (ORDER STATISTICS TREE)
|
| 367 |
+
# ============================================
|
| 368 |
+
|
| 369 |
+
@dataclass
|
| 370 |
+
class RankTreeNode:
|
| 371 |
+
"""Node in an Order Statistics Tree (augmented BST)."""
|
| 372 |
+
key: Any
|
| 373 |
+
value: Any = None
|
| 374 |
+
left: 'RankTreeNode' = None
|
| 375 |
+
right: 'RankTreeNode' = None
|
| 376 |
+
size: int = 1 # Size of subtree (for rank queries)
|
| 377 |
+
height: int = 1 # For AVL balancing
|
| 378 |
+
|
| 379 |
+
|
| 380 |
+
class RankTree:
|
| 381 |
+
"""
|
| 382 |
+
Order Statistics Tree with AVL balancing.
|
| 383 |
+
|
| 384 |
+
Supports:
|
| 385 |
+
- O(log n) insert, delete, search
|
| 386 |
+
- O(log n) select(k) - find k-th smallest
|
| 387 |
+
- O(log n) rank(x) - find rank of element x
|
| 388 |
+
|
| 389 |
+
Use case: "What rank is this user?", "Who is the 100th most active?"
|
| 390 |
+
"""
|
| 391 |
+
|
| 392 |
+
def __init__(self, key_func: Callable[[Any], Any] = None):
|
| 393 |
+
self.root: Optional[RankTreeNode] = None
|
| 394 |
+
self.key_func = key_func or (lambda x: x)
|
| 395 |
+
|
| 396 |
+
def _get_size(self, node: RankTreeNode) -> int:
|
| 397 |
+
return node.size if node else 0
|
| 398 |
+
|
| 399 |
+
def _get_height(self, node: RankTreeNode) -> int:
|
| 400 |
+
return node.height if node else 0
|
| 401 |
+
|
| 402 |
+
def _get_balance(self, node: RankTreeNode) -> int:
|
| 403 |
+
return self._get_height(node.left) - self._get_height(node.right) if node else 0
|
| 404 |
+
|
| 405 |
+
def _update(self, node: RankTreeNode) -> None:
|
| 406 |
+
"""Update size and height of a node."""
|
| 407 |
+
if node:
|
| 408 |
+
node.size = 1 + self._get_size(node.left) + self._get_size(node.right)
|
| 409 |
+
node.height = 1 + max(self._get_height(node.left), self._get_height(node.right))
|
| 410 |
+
|
| 411 |
+
def _rotate_right(self, y: RankTreeNode) -> RankTreeNode:
|
| 412 |
+
"""Right rotation for AVL balance."""
|
| 413 |
+
x = y.left
|
| 414 |
+
T2 = x.right
|
| 415 |
+
|
| 416 |
+
x.right = y
|
| 417 |
+
y.left = T2
|
| 418 |
+
|
| 419 |
+
self._update(y)
|
| 420 |
+
self._update(x)
|
| 421 |
+
|
| 422 |
+
return x
|
| 423 |
+
|
| 424 |
+
def _rotate_left(self, x: RankTreeNode) -> RankTreeNode:
|
| 425 |
+
"""Left rotation for AVL balance."""
|
| 426 |
+
y = x.right
|
| 427 |
+
T2 = y.left
|
| 428 |
+
|
| 429 |
+
y.left = x
|
| 430 |
+
x.right = T2
|
| 431 |
+
|
| 432 |
+
self._update(x)
|
| 433 |
+
self._update(y)
|
| 434 |
+
|
| 435 |
+
return y
|
| 436 |
+
|
| 437 |
+
def _balance(self, node: RankTreeNode) -> RankTreeNode:
|
| 438 |
+
"""Balance the node if needed (AVL)."""
|
| 439 |
+
self._update(node)
|
| 440 |
+
balance = self._get_balance(node)
|
| 441 |
+
|
| 442 |
+
# Left heavy
|
| 443 |
+
if balance > 1:
|
| 444 |
+
if self._get_balance(node.left) < 0:
|
| 445 |
+
node.left = self._rotate_left(node.left)
|
| 446 |
+
return self._rotate_right(node)
|
| 447 |
+
|
| 448 |
+
# Right heavy
|
| 449 |
+
if balance < -1:
|
| 450 |
+
if self._get_balance(node.right) > 0:
|
| 451 |
+
node.right = self._rotate_right(node.right)
|
| 452 |
+
return self._rotate_left(node)
|
| 453 |
+
|
| 454 |
+
return node
|
| 455 |
+
|
| 456 |
+
def insert(self, key: Any, value: Any = None) -> None:
|
| 457 |
+
"""Insert a key-value pair. O(log n)."""
|
| 458 |
+
self.root = self._insert(self.root, key, value)
|
| 459 |
+
|
| 460 |
+
def _insert(self, node: RankTreeNode, key: Any, value: Any) -> RankTreeNode:
|
| 461 |
+
if not node:
|
| 462 |
+
return RankTreeNode(key=key, value=value)
|
| 463 |
+
|
| 464 |
+
if key < node.key:
|
| 465 |
+
node.left = self._insert(node.left, key, value)
|
| 466 |
+
elif key > node.key:
|
| 467 |
+
node.right = self._insert(node.right, key, value)
|
| 468 |
+
else:
|
| 469 |
+
node.value = value # Update existing
|
| 470 |
+
return node
|
| 471 |
+
|
| 472 |
+
return self._balance(node)
|
| 473 |
+
|
| 474 |
+
def select(self, k: int) -> Optional[Any]:
|
| 475 |
+
"""
|
| 476 |
+
Find the k-th smallest element (1-indexed).
|
| 477 |
+
|
| 478 |
+
O(log n)
|
| 479 |
+
|
| 480 |
+
Use case: "Who is the 10th most active user?"
|
| 481 |
+
"""
|
| 482 |
+
return self._select(self.root, k)
|
| 483 |
+
|
| 484 |
+
def _select(self, node: RankTreeNode, k: int) -> Optional[Any]:
|
| 485 |
+
if not node:
|
| 486 |
+
return None
|
| 487 |
+
|
| 488 |
+
left_size = self._get_size(node.left)
|
| 489 |
+
|
| 490 |
+
if k == left_size + 1:
|
| 491 |
+
return node.value
|
| 492 |
+
elif k <= left_size:
|
| 493 |
+
return self._select(node.left, k)
|
| 494 |
+
else:
|
| 495 |
+
return self._select(node.right, k - left_size - 1)
|
| 496 |
+
|
| 497 |
+
def rank(self, key: Any) -> int:
|
| 498 |
+
"""
|
| 499 |
+
Find the rank of an element (1-indexed).
|
| 500 |
+
|
| 501 |
+
O(log n)
|
| 502 |
+
|
| 503 |
+
Use case: "What rank is user X?"
|
| 504 |
+
"""
|
| 505 |
+
return self._rank(self.root, key)
|
| 506 |
+
|
| 507 |
+
def _rank(self, node: RankTreeNode, key: Any) -> int:
|
| 508 |
+
if not node:
|
| 509 |
+
return 0
|
| 510 |
+
|
| 511 |
+
if key < node.key:
|
| 512 |
+
return self._rank(node.left, key)
|
| 513 |
+
elif key > node.key:
|
| 514 |
+
return 1 + self._get_size(node.left) + self._rank(node.right, key)
|
| 515 |
+
else:
|
| 516 |
+
return self._get_size(node.left) + 1
|
| 517 |
+
|
| 518 |
+
def __len__(self) -> int:
|
| 519 |
+
return self._get_size(self.root)
|
| 520 |
+
|
| 521 |
+
def inorder(self) -> Generator[tuple[Any, Any], None, None]:
|
| 522 |
+
"""Iterate in sorted order."""
|
| 523 |
+
def _inorder(node):
|
| 524 |
+
if node:
|
| 525 |
+
yield from _inorder(node.left)
|
| 526 |
+
yield (node.key, node.value)
|
| 527 |
+
yield from _inorder(node.right)
|
| 528 |
+
yield from _inorder(self.root)
|
| 529 |
+
|
| 530 |
+
|
| 531 |
+
# ============================================
|
| 532 |
+
# BUCKET SORT FOR TIME-BASED DATA
|
| 533 |
+
# ============================================
|
| 534 |
+
|
| 535 |
+
def bucket_sort_by_time(
|
| 536 |
+
records: list[dict],
|
| 537 |
+
time_field: str,
|
| 538 |
+
bucket_size: int = 3600, # Default: 1 hour
|
| 539 |
+
start_time: int = None,
|
| 540 |
+
end_time: int = None
|
| 541 |
+
) -> list[list[dict]]:
|
| 542 |
+
"""
|
| 543 |
+
Sort records into time-based buckets.
|
| 544 |
+
|
| 545 |
+
Time: O(n + k) where k = number of buckets
|
| 546 |
+
Space: O(n)
|
| 547 |
+
|
| 548 |
+
Use case: Group messages by hour, day, week for histograms.
|
| 549 |
+
|
| 550 |
+
Args:
|
| 551 |
+
records: List of dicts with timestamp field
|
| 552 |
+
time_field: Name of the timestamp field
|
| 553 |
+
bucket_size: Size of each bucket in seconds
|
| 554 |
+
start_time: Start of range (default: min timestamp)
|
| 555 |
+
end_time: End of range (default: max timestamp)
|
| 556 |
+
|
| 557 |
+
Returns:
|
| 558 |
+
List of buckets, each containing records in that time range
|
| 559 |
+
"""
|
| 560 |
+
if not records:
|
| 561 |
+
return []
|
| 562 |
+
|
| 563 |
+
# Extract timestamps
|
| 564 |
+
timestamps = [r.get(time_field, 0) for r in records]
|
| 565 |
+
|
| 566 |
+
if start_time is None:
|
| 567 |
+
start_time = min(timestamps)
|
| 568 |
+
if end_time is None:
|
| 569 |
+
end_time = max(timestamps)
|
| 570 |
+
|
| 571 |
+
# Calculate number of buckets
|
| 572 |
+
n_buckets = max(1, (end_time - start_time) // bucket_size + 1)
|
| 573 |
+
|
| 574 |
+
# Initialize buckets
|
| 575 |
+
buckets: list[list[dict]] = [[] for _ in range(n_buckets)]
|
| 576 |
+
|
| 577 |
+
# Distribute records into buckets
|
| 578 |
+
for record in records:
|
| 579 |
+
ts = record.get(time_field, 0)
|
| 580 |
+
if ts < start_time or ts > end_time:
|
| 581 |
+
continue
|
| 582 |
+
|
| 583 |
+
bucket_idx = min((ts - start_time) // bucket_size, n_buckets - 1)
|
| 584 |
+
buckets[bucket_idx].append(record)
|
| 585 |
+
|
| 586 |
+
return buckets
|
| 587 |
+
|
| 588 |
+
|
| 589 |
+
def time_histogram(
|
| 590 |
+
records: list[dict],
|
| 591 |
+
time_field: str,
|
| 592 |
+
bucket_size: int = 3600
|
| 593 |
+
) -> list[tuple[int, int]]:
|
| 594 |
+
"""
|
| 595 |
+
Create a histogram of record counts over time.
|
| 596 |
+
|
| 597 |
+
Returns list of (bucket_start_time, count) tuples.
|
| 598 |
+
|
| 599 |
+
Use case: Activity over time visualization.
|
| 600 |
+
"""
|
| 601 |
+
if not records:
|
| 602 |
+
return []
|
| 603 |
+
|
| 604 |
+
timestamps = [r.get(time_field, 0) for r in records]
|
| 605 |
+
start_time = min(timestamps)
|
| 606 |
+
end_time = max(timestamps)
|
| 607 |
+
|
| 608 |
+
buckets = bucket_sort_by_time(records, time_field, bucket_size, start_time, end_time)
|
| 609 |
+
|
| 610 |
+
result = []
|
| 611 |
+
for i, bucket in enumerate(buckets):
|
| 612 |
+
bucket_time = start_time + i * bucket_size
|
| 613 |
+
result.append((bucket_time, len(bucket)))
|
| 614 |
+
|
| 615 |
+
return result
|
| 616 |
+
|
| 617 |
+
|
| 618 |
+
def hourly_distribution(
|
| 619 |
+
records: list[dict],
|
| 620 |
+
time_field: str
|
| 621 |
+
) -> dict[int, int]:
|
| 622 |
+
"""
|
| 623 |
+
Get distribution of records by hour of day (0-23).
|
| 624 |
+
|
| 625 |
+
Time: O(n)
|
| 626 |
+
|
| 627 |
+
Use case: When are users most active?
|
| 628 |
+
"""
|
| 629 |
+
from datetime import datetime
|
| 630 |
+
|
| 631 |
+
dist = defaultdict(int)
|
| 632 |
+
|
| 633 |
+
for record in records:
|
| 634 |
+
ts = record.get(time_field, 0)
|
| 635 |
+
if ts:
|
| 636 |
+
hour = datetime.fromtimestamp(ts).hour
|
| 637 |
+
dist[hour] += 1
|
| 638 |
+
|
| 639 |
+
return dict(dist)
|
| 640 |
+
|
| 641 |
+
|
| 642 |
+
# ============================================
|
| 643 |
+
# COMBINED DATA STRUCTURE: RANKED TIME INDEX
|
| 644 |
+
# ============================================
|
| 645 |
+
|
| 646 |
+
class RankedTimeIndex:
|
| 647 |
+
"""
|
| 648 |
+
Combined data structure for efficient time-based and rank queries.
|
| 649 |
+
|
| 650 |
+
Combines:
|
| 651 |
+
- Bucket sort for O(1) time range access
|
| 652 |
+
- Rank tree for O(log n) rank queries
|
| 653 |
+
- Top-K heap for efficient top queries
|
| 654 |
+
|
| 655 |
+
Use case: "Top 10 users in the last hour", "Rank of user X this week"
|
| 656 |
+
"""
|
| 657 |
+
|
| 658 |
+
def __init__(self, bucket_size: int = 3600):
|
| 659 |
+
self.bucket_size = bucket_size
|
| 660 |
+
self.buckets: dict[int, list[dict]] = defaultdict(list) # bucket_id -> records
|
| 661 |
+
self.rank_tree = RankTree() # For rank queries
|
| 662 |
+
self.total_count = 0
|
| 663 |
+
self.min_time = float('inf')
|
| 664 |
+
self.max_time = 0
|
| 665 |
+
|
| 666 |
+
def add(self, record: dict, time_field: str = 'date_unixtime', rank_field: str = None) -> None:
|
| 667 |
+
"""Add a record to the index. O(log n)."""
|
| 668 |
+
ts = record.get(time_field, 0)
|
| 669 |
+
|
| 670 |
+
# Update time bounds
|
| 671 |
+
self.min_time = min(self.min_time, ts)
|
| 672 |
+
self.max_time = max(self.max_time, ts)
|
| 673 |
+
|
| 674 |
+
# Add to time bucket
|
| 675 |
+
bucket_id = ts // self.bucket_size
|
| 676 |
+
self.buckets[bucket_id].append(record)
|
| 677 |
+
|
| 678 |
+
# Add to rank tree if rank field specified
|
| 679 |
+
if rank_field and rank_field in record:
|
| 680 |
+
self.rank_tree.insert(record[rank_field], record)
|
| 681 |
+
|
| 682 |
+
self.total_count += 1
|
| 683 |
+
|
| 684 |
+
def get_time_range(self, start_time: int, end_time: int) -> list[dict]:
|
| 685 |
+
"""
|
| 686 |
+
Get all records in time range. O(k) where k = records in range.
|
| 687 |
+
"""
|
| 688 |
+
start_bucket = start_time // self.bucket_size
|
| 689 |
+
end_bucket = end_time // self.bucket_size
|
| 690 |
+
|
| 691 |
+
results = []
|
| 692 |
+
for bucket_id in range(start_bucket, end_bucket + 1):
|
| 693 |
+
for record in self.buckets.get(bucket_id, []):
|
| 694 |
+
ts = record.get('date_unixtime', 0)
|
| 695 |
+
if start_time <= ts <= end_time:
|
| 696 |
+
results.append(record)
|
| 697 |
+
|
| 698 |
+
return results
|
| 699 |
+
|
| 700 |
+
def top_k_in_range(
|
| 701 |
+
self,
|
| 702 |
+
start_time: int,
|
| 703 |
+
end_time: int,
|
| 704 |
+
k: int,
|
| 705 |
+
score_field: str
|
| 706 |
+
) -> list[dict]:
|
| 707 |
+
"""
|
| 708 |
+
Get top K records by score in time range.
|
| 709 |
+
|
| 710 |
+
O(m log k) where m = records in range
|
| 711 |
+
"""
|
| 712 |
+
records = self.get_time_range(start_time, end_time)
|
| 713 |
+
return top_k_by_field(records, score_field, k)
|
| 714 |
+
|
| 715 |
+
def get_rank(self, key: Any) -> int:
|
| 716 |
+
"""Get rank of element. O(log n)."""
|
| 717 |
+
return self.rank_tree.rank(key)
|
| 718 |
+
|
| 719 |
+
def get_by_rank(self, k: int) -> Optional[dict]:
|
| 720 |
+
"""Get element by rank. O(log n)."""
|
| 721 |
+
return self.rank_tree.select(k)
|
| 722 |
+
|
| 723 |
+
|
| 724 |
+
# ============================================
|
| 725 |
+
# TESTS AND DEMOS
|
| 726 |
+
# ============================================
|
| 727 |
+
|
| 728 |
+
def run_tests():
|
| 729 |
+
"""Run tests for all algorithms."""
|
| 730 |
+
print("=" * 60)
|
| 731 |
+
print("ALGORITHM TESTS")
|
| 732 |
+
print("=" * 60)
|
| 733 |
+
|
| 734 |
+
# Test LCS
|
| 735 |
+
print("\n--- LCS (Longest Common Subsequence) ---")
|
| 736 |
+
s1 = "שלום לכולם מה קורה"
|
| 737 |
+
s2 = "שלום לכולם מה נשמע"
|
| 738 |
+
lcs = lcs_string(s1, s2)
|
| 739 |
+
sim = lcs_similarity(s1, s2)
|
| 740 |
+
print(f"String 1: {s1}")
|
| 741 |
+
print(f"String 2: {s2}")
|
| 742 |
+
print(f"LCS: '{lcs}'")
|
| 743 |
+
print(f"Similarity: {sim:.2%}")
|
| 744 |
+
|
| 745 |
+
# Test similar message detection
|
| 746 |
+
messages = [
|
| 747 |
+
(1, "היי מה קורה איך אתה"),
|
| 748 |
+
(2, "היי מה קורה איך את"),
|
| 749 |
+
(3, "שלום לכולם"),
|
| 750 |
+
(4, "היי מה קורה איך אתם"),
|
| 751 |
+
]
|
| 752 |
+
similar = find_similar_messages(messages, threshold=0.7, min_length=5)
|
| 753 |
+
print(f"\nSimilar message pairs (threshold 0.7):")
|
| 754 |
+
for id1, id2, sim in similar:
|
| 755 |
+
print(f" Messages {id1} & {id2}: {sim:.2%}")
|
| 756 |
+
|
| 757 |
+
# Test Top-K
|
| 758 |
+
print("\n--- Heap-based Top-K ---")
|
| 759 |
+
items = ['apple', 'banana', 'apple', 'cherry', 'banana', 'apple', 'date', 'banana']
|
| 760 |
+
top = top_k_frequent(items, k=2)
|
| 761 |
+
print(f"Items: {items}")
|
| 762 |
+
print(f"Top 2 frequent: {top}")
|
| 763 |
+
|
| 764 |
+
# Test Selection (Median)
|
| 765 |
+
print("\n--- Selection Algorithm (Median) ---")
|
| 766 |
+
arr = [3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5]
|
| 767 |
+
median = find_median(arr)
|
| 768 |
+
p90 = find_percentile(arr, 90)
|
| 769 |
+
print(f"Array: {arr}")
|
| 770 |
+
print(f"Median: {median}")
|
| 771 |
+
print(f"90th percentile: {p90}")
|
| 772 |
+
|
| 773 |
+
# Test Rank Tree
|
| 774 |
+
print("\n--- Rank Tree (Order Statistics) ---")
|
| 775 |
+
tree = RankTree()
|
| 776 |
+
users = [
|
| 777 |
+
(100, "Alice"),
|
| 778 |
+
(250, "Bob"),
|
| 779 |
+
(50, "Charlie"),
|
| 780 |
+
(300, "Diana"),
|
| 781 |
+
(150, "Eve"),
|
| 782 |
+
]
|
| 783 |
+
for score, name in users:
|
| 784 |
+
tree.insert(score, name)
|
| 785 |
+
|
| 786 |
+
print(f"Users by score: {users}")
|
| 787 |
+
print(f"3rd ranked (by score): {tree.select(3)}")
|
| 788 |
+
print(f"Rank of score 150: {tree.rank(150)}")
|
| 789 |
+
print(f"All in order: {list(tree.inorder())}")
|
| 790 |
+
|
| 791 |
+
# Test Bucket Sort
|
| 792 |
+
print("\n--- Bucket Sort (Time-based) ---")
|
| 793 |
+
records = [
|
| 794 |
+
{'id': 1, 'ts': 1000},
|
| 795 |
+
{'id': 2, 'ts': 1500},
|
| 796 |
+
{'id': 3, 'ts': 2500},
|
| 797 |
+
{'id': 4, 'ts': 1200},
|
| 798 |
+
{'id': 5, 'ts': 3000},
|
| 799 |
+
]
|
| 800 |
+
hist = time_histogram(records, 'ts', bucket_size=1000)
|
| 801 |
+
print(f"Records: {records}")
|
| 802 |
+
print(f"Histogram (bucket=1000): {hist}")
|
| 803 |
+
|
| 804 |
+
# Test Combined Structure
|
| 805 |
+
print("\n--- Combined RankedTimeIndex ---")
|
| 806 |
+
index = RankedTimeIndex(bucket_size=1000)
|
| 807 |
+
for r in records:
|
| 808 |
+
index.add(r, time_field='ts', rank_field='id')
|
| 809 |
+
|
| 810 |
+
range_result = index.get_time_range(1000, 2000)
|
| 811 |
+
print(f"Records in time range 1000-2000: {[r['id'] for r in range_result]}")
|
| 812 |
+
|
| 813 |
+
print("\n" + "=" * 60)
|
| 814 |
+
print("ALL TESTS PASSED!")
|
| 815 |
+
print("=" * 60)
|
| 816 |
+
|
| 817 |
+
|
| 818 |
+
if __name__ == '__main__':
|
| 819 |
+
run_tests()
|
dashboard.py
ADDED
|
@@ -0,0 +1,2086 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Telegram Analytics Dashboard - Web Server
|
| 4 |
+
|
| 5 |
+
A Flask-based web dashboard for visualizing Telegram chat analytics.
|
| 6 |
+
Inspired by Combot and other Telegram statistics bots.
|
| 7 |
+
|
| 8 |
+
Usage:
|
| 9 |
+
python dashboard.py --db telegram.db --port 5000
|
| 10 |
+
Then open http://localhost:5000 in your browser
|
| 11 |
+
|
| 12 |
+
Requirements:
|
| 13 |
+
pip install flask
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
import sqlite3
|
| 17 |
+
import json
|
| 18 |
+
import csv
|
| 19 |
+
import io
|
| 20 |
+
import os
|
| 21 |
+
from datetime import datetime, timedelta
|
| 22 |
+
from flask import Flask, render_template, jsonify, request, Response
|
| 23 |
+
from typing import Optional
|
| 24 |
+
from collections import defaultdict
|
| 25 |
+
|
| 26 |
+
# ==========================================
|
| 27 |
+
# AI CONFIGURATION
|
| 28 |
+
# Set via environment variables (e.g. in .env or hosting platform settings)
|
| 29 |
+
# ==========================================
|
| 30 |
+
if not os.environ.get('AI_PROVIDER'):
|
| 31 |
+
os.environ['AI_PROVIDER'] = 'gemini'
|
| 32 |
+
# GEMINI_API_KEY should be set as an environment variable, not hardcoded
|
| 33 |
+
|
| 34 |
+
# Import our algorithms
|
| 35 |
+
from algorithms import (
|
| 36 |
+
TopK, find_median, find_percentile, top_k_frequent,
|
| 37 |
+
RankTree, lcs_similarity, find_similar_messages,
|
| 38 |
+
bucket_sort_by_time, time_histogram, RankedTimeIndex
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
# Import semantic search (uses pre-computed embeddings)
|
| 42 |
+
try:
|
| 43 |
+
from semantic_search import get_semantic_search
|
| 44 |
+
HAS_SEMANTIC_SEARCH = True
|
| 45 |
+
except ImportError:
|
| 46 |
+
HAS_SEMANTIC_SEARCH = False
|
| 47 |
+
get_semantic_search = None
|
| 48 |
+
|
| 49 |
+
app = Flask(__name__)
|
| 50 |
+
DB_PATH = 'telegram.db'
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def get_db():
|
| 54 |
+
"""Get database connection."""
|
| 55 |
+
conn = sqlite3.connect(DB_PATH)
|
| 56 |
+
conn.row_factory = sqlite3.Row
|
| 57 |
+
return conn
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def parse_timeframe(timeframe: str) -> tuple[int, int]:
|
| 61 |
+
"""Parse timeframe string to Unix timestamps."""
|
| 62 |
+
now = datetime.now()
|
| 63 |
+
today_start = datetime(now.year, now.month, now.day)
|
| 64 |
+
|
| 65 |
+
if timeframe == 'today':
|
| 66 |
+
start = today_start
|
| 67 |
+
end = now
|
| 68 |
+
elif timeframe == 'yesterday':
|
| 69 |
+
start = today_start - timedelta(days=1)
|
| 70 |
+
end = today_start
|
| 71 |
+
elif timeframe == 'week':
|
| 72 |
+
start = today_start - timedelta(days=7)
|
| 73 |
+
end = now
|
| 74 |
+
elif timeframe == 'month':
|
| 75 |
+
start = today_start - timedelta(days=30)
|
| 76 |
+
end = now
|
| 77 |
+
elif timeframe == 'year':
|
| 78 |
+
start = today_start - timedelta(days=365)
|
| 79 |
+
end = now
|
| 80 |
+
elif timeframe == 'all':
|
| 81 |
+
return 0, int(now.timestamp())
|
| 82 |
+
else:
|
| 83 |
+
# Custom range: "start,end" as Unix timestamps
|
| 84 |
+
try:
|
| 85 |
+
parts = timeframe.split(',')
|
| 86 |
+
return int(parts[0]), int(parts[1])
|
| 87 |
+
except:
|
| 88 |
+
return 0, int(now.timestamp())
|
| 89 |
+
|
| 90 |
+
return int(start.timestamp()), int(end.timestamp())
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
# ==========================================
|
| 94 |
+
# CACHE INVALIDATION SYSTEM
|
| 95 |
+
# ==========================================
|
| 96 |
+
|
| 97 |
+
_cache_version = 0 # Incremented on DB updates to invalidate all caches
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def invalidate_caches():
|
| 101 |
+
"""Invalidate all cached data. Call after DB updates (sync, import, etc.)."""
|
| 102 |
+
global _cache_version, _user_rank_tree, _user_rank_tree_timeframe
|
| 103 |
+
_cache_version += 1
|
| 104 |
+
_user_rank_tree = None
|
| 105 |
+
_user_rank_tree_timeframe = None
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
# ==========================================
|
| 109 |
+
# GLOBAL ALGORITHM CACHES
|
| 110 |
+
# ==========================================
|
| 111 |
+
|
| 112 |
+
# RankTree for O(log n) user ranking - rebuilt on demand
|
| 113 |
+
_user_rank_tree = None
|
| 114 |
+
_user_rank_tree_timeframe = None
|
| 115 |
+
_user_rank_tree_version = -1
|
| 116 |
+
|
| 117 |
+
def get_user_rank_tree(timeframe: str):
|
| 118 |
+
"""
|
| 119 |
+
Get or rebuild the user rank tree for efficient O(log n) rank queries.
|
| 120 |
+
Tree is cached and rebuilt only when timeframe or DB version changes.
|
| 121 |
+
"""
|
| 122 |
+
global _user_rank_tree, _user_rank_tree_timeframe, _user_rank_tree_version
|
| 123 |
+
|
| 124 |
+
if (_user_rank_tree is not None
|
| 125 |
+
and _user_rank_tree_timeframe == timeframe
|
| 126 |
+
and _user_rank_tree_version == _cache_version):
|
| 127 |
+
return _user_rank_tree
|
| 128 |
+
|
| 129 |
+
start_ts, end_ts = parse_timeframe(timeframe)
|
| 130 |
+
conn = get_db()
|
| 131 |
+
|
| 132 |
+
cursor = conn.execute('''
|
| 133 |
+
SELECT from_id, from_name, COUNT(*) as message_count
|
| 134 |
+
FROM messages
|
| 135 |
+
WHERE date_unixtime BETWEEN ? AND ?
|
| 136 |
+
AND from_id IS NOT NULL AND from_id != ''
|
| 137 |
+
GROUP BY from_id
|
| 138 |
+
ORDER BY message_count DESC
|
| 139 |
+
''', (start_ts, end_ts))
|
| 140 |
+
|
| 141 |
+
_user_rank_tree = RankTree()
|
| 142 |
+
for row in cursor.fetchall():
|
| 143 |
+
_user_rank_tree.insert(
|
| 144 |
+
-row['message_count'],
|
| 145 |
+
{'user_id': row['from_id'], 'name': row['from_name'], 'messages': row['message_count']}
|
| 146 |
+
)
|
| 147 |
+
|
| 148 |
+
conn.close()
|
| 149 |
+
_user_rank_tree_timeframe = timeframe
|
| 150 |
+
_user_rank_tree_version = _cache_version
|
| 151 |
+
return _user_rank_tree
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
# ==========================================
|
| 155 |
+
# PAGE ROUTES
|
| 156 |
+
# ==========================================
|
| 157 |
+
|
| 158 |
+
@app.route('/')
|
| 159 |
+
def index():
|
| 160 |
+
"""Main dashboard page."""
|
| 161 |
+
return render_template('index.html')
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
@app.route('/users')
|
| 165 |
+
def users_page():
|
| 166 |
+
"""User leaderboard page."""
|
| 167 |
+
return render_template('users.html')
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
@app.route('/moderation')
|
| 171 |
+
def moderation_page():
|
| 172 |
+
"""Moderation analytics page."""
|
| 173 |
+
return render_template('moderation.html')
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
@app.route('/search')
|
| 177 |
+
def search_page():
|
| 178 |
+
"""Search page."""
|
| 179 |
+
return render_template('search.html')
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
@app.route('/chat')
|
| 183 |
+
def chat_page():
|
| 184 |
+
"""Chat view page - Telegram-like interface."""
|
| 185 |
+
return render_template('chat.html')
|
| 186 |
+
|
| 187 |
+
|
| 188 |
+
@app.route('/user/<user_id>')
|
| 189 |
+
def user_profile_page(user_id):
|
| 190 |
+
"""User profile page with comprehensive statistics."""
|
| 191 |
+
return render_template('user_profile.html', user_id=user_id)
|
| 192 |
+
|
| 193 |
+
|
| 194 |
+
@app.route('/settings')
|
| 195 |
+
def settings_page():
|
| 196 |
+
"""Settings and data update page."""
|
| 197 |
+
return render_template('settings.html')
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
# ==========================================
|
| 201 |
+
# API ENDPOINTS - OVERVIEW STATS
|
| 202 |
+
# ==========================================
|
| 203 |
+
|
| 204 |
+
@app.route('/api/overview')
|
| 205 |
+
def api_overview():
|
| 206 |
+
"""Get overview statistics."""
|
| 207 |
+
timeframe = request.args.get('timeframe', 'all')
|
| 208 |
+
start_ts, end_ts = parse_timeframe(timeframe)
|
| 209 |
+
|
| 210 |
+
conn = get_db()
|
| 211 |
+
|
| 212 |
+
# Total messages
|
| 213 |
+
cursor = conn.execute('''
|
| 214 |
+
SELECT COUNT(*) FROM messages
|
| 215 |
+
WHERE date_unixtime BETWEEN ? AND ?
|
| 216 |
+
''', (start_ts, end_ts))
|
| 217 |
+
total_messages = cursor.fetchone()[0]
|
| 218 |
+
|
| 219 |
+
# Active users
|
| 220 |
+
cursor = conn.execute('''
|
| 221 |
+
SELECT COUNT(DISTINCT from_id) FROM messages
|
| 222 |
+
WHERE date_unixtime BETWEEN ? AND ?
|
| 223 |
+
''', (start_ts, end_ts))
|
| 224 |
+
active_users = cursor.fetchone()[0]
|
| 225 |
+
|
| 226 |
+
# Total users (all time)
|
| 227 |
+
cursor = conn.execute('SELECT COUNT(*) FROM users')
|
| 228 |
+
total_users = cursor.fetchone()[0]
|
| 229 |
+
|
| 230 |
+
# Date range
|
| 231 |
+
cursor = conn.execute('''
|
| 232 |
+
SELECT MIN(date_unixtime), MAX(date_unixtime) FROM messages
|
| 233 |
+
WHERE date_unixtime BETWEEN ? AND ?
|
| 234 |
+
''', (start_ts, end_ts))
|
| 235 |
+
row = cursor.fetchone()
|
| 236 |
+
first_msg = row[0] or start_ts
|
| 237 |
+
last_msg = row[1] or end_ts
|
| 238 |
+
|
| 239 |
+
# Calculate days
|
| 240 |
+
days = max(1, (last_msg - first_msg) // 86400)
|
| 241 |
+
|
| 242 |
+
# Messages per day
|
| 243 |
+
messages_per_day = total_messages / days
|
| 244 |
+
|
| 245 |
+
# Users per day (average unique users)
|
| 246 |
+
cursor = conn.execute('''
|
| 247 |
+
SELECT COUNT(DISTINCT from_id) as users,
|
| 248 |
+
date(datetime(date_unixtime, 'unixepoch')) as day
|
| 249 |
+
FROM messages
|
| 250 |
+
WHERE date_unixtime BETWEEN ? AND ?
|
| 251 |
+
GROUP BY day
|
| 252 |
+
''', (start_ts, end_ts))
|
| 253 |
+
daily_users = [r[0] for r in cursor.fetchall()]
|
| 254 |
+
users_per_day = sum(daily_users) / len(daily_users) if daily_users else 0
|
| 255 |
+
|
| 256 |
+
# Messages with media/links
|
| 257 |
+
cursor = conn.execute('''
|
| 258 |
+
SELECT
|
| 259 |
+
SUM(has_media) as media,
|
| 260 |
+
SUM(has_links) as links,
|
| 261 |
+
SUM(has_mentions) as mentions
|
| 262 |
+
FROM messages
|
| 263 |
+
WHERE date_unixtime BETWEEN ? AND ?
|
| 264 |
+
''', (start_ts, end_ts))
|
| 265 |
+
row = cursor.fetchone()
|
| 266 |
+
media_count = row[0] or 0
|
| 267 |
+
links_count = row[1] or 0
|
| 268 |
+
mentions_count = row[2] or 0
|
| 269 |
+
|
| 270 |
+
# Replies
|
| 271 |
+
cursor = conn.execute('''
|
| 272 |
+
SELECT COUNT(*) FROM messages
|
| 273 |
+
WHERE date_unixtime BETWEEN ? AND ?
|
| 274 |
+
AND reply_to_message_id IS NOT NULL
|
| 275 |
+
''', (start_ts, end_ts))
|
| 276 |
+
replies_count = cursor.fetchone()[0]
|
| 277 |
+
|
| 278 |
+
# Forwards
|
| 279 |
+
cursor = conn.execute('''
|
| 280 |
+
SELECT COUNT(*) FROM messages
|
| 281 |
+
WHERE date_unixtime BETWEEN ? AND ?
|
| 282 |
+
AND forwarded_from IS NOT NULL
|
| 283 |
+
''', (start_ts, end_ts))
|
| 284 |
+
forwards_count = cursor.fetchone()[0]
|
| 285 |
+
|
| 286 |
+
conn.close()
|
| 287 |
+
|
| 288 |
+
return jsonify({
|
| 289 |
+
'total_messages': total_messages,
|
| 290 |
+
'active_users': active_users,
|
| 291 |
+
'total_users': total_users,
|
| 292 |
+
'messages_per_day': round(messages_per_day, 1),
|
| 293 |
+
'users_per_day': round(users_per_day, 1),
|
| 294 |
+
'messages_per_user': round(total_messages / active_users, 1) if active_users else 0,
|
| 295 |
+
'media_count': media_count,
|
| 296 |
+
'links_count': links_count,
|
| 297 |
+
'mentions_count': mentions_count,
|
| 298 |
+
'replies_count': replies_count,
|
| 299 |
+
'forwards_count': forwards_count,
|
| 300 |
+
'days_span': days,
|
| 301 |
+
'first_message': first_msg,
|
| 302 |
+
'last_message': last_msg
|
| 303 |
+
})
|
| 304 |
+
|
| 305 |
+
|
| 306 |
+
# ==========================================
|
| 307 |
+
# API ENDPOINTS - CHARTS
|
| 308 |
+
# ==========================================
|
| 309 |
+
|
| 310 |
+
@app.route('/api/chart/messages')
|
| 311 |
+
def api_chart_messages():
|
| 312 |
+
"""Get message volume over time."""
|
| 313 |
+
timeframe = request.args.get('timeframe', 'month')
|
| 314 |
+
granularity = request.args.get('granularity', 'day') # hour, day, week
|
| 315 |
+
start_ts, end_ts = parse_timeframe(timeframe)
|
| 316 |
+
|
| 317 |
+
conn = get_db()
|
| 318 |
+
|
| 319 |
+
if granularity == 'hour':
|
| 320 |
+
format_str = '%Y-%m-%d %H:00'
|
| 321 |
+
elif granularity == 'week':
|
| 322 |
+
format_str = '%Y-W%W'
|
| 323 |
+
else: # day
|
| 324 |
+
format_str = '%Y-%m-%d'
|
| 325 |
+
|
| 326 |
+
cursor = conn.execute(f'''
|
| 327 |
+
SELECT
|
| 328 |
+
strftime('{format_str}', datetime(date_unixtime, 'unixepoch')) as period,
|
| 329 |
+
COUNT(*) as count
|
| 330 |
+
FROM messages
|
| 331 |
+
WHERE date_unixtime BETWEEN ? AND ?
|
| 332 |
+
GROUP BY period
|
| 333 |
+
ORDER BY period
|
| 334 |
+
''', (start_ts, end_ts))
|
| 335 |
+
|
| 336 |
+
data = [{'label': row[0], 'value': row[1]} for row in cursor.fetchall()]
|
| 337 |
+
conn.close()
|
| 338 |
+
|
| 339 |
+
return jsonify(data)
|
| 340 |
+
|
| 341 |
+
|
| 342 |
+
@app.route('/api/chart/users')
|
| 343 |
+
def api_chart_users():
|
| 344 |
+
"""Get active users over time."""
|
| 345 |
+
timeframe = request.args.get('timeframe', 'month')
|
| 346 |
+
granularity = request.args.get('granularity', 'day')
|
| 347 |
+
start_ts, end_ts = parse_timeframe(timeframe)
|
| 348 |
+
|
| 349 |
+
conn = get_db()
|
| 350 |
+
|
| 351 |
+
if granularity == 'hour':
|
| 352 |
+
format_str = '%Y-%m-%d %H:00'
|
| 353 |
+
elif granularity == 'week':
|
| 354 |
+
format_str = '%Y-W%W'
|
| 355 |
+
else:
|
| 356 |
+
format_str = '%Y-%m-%d'
|
| 357 |
+
|
| 358 |
+
cursor = conn.execute(f'''
|
| 359 |
+
SELECT
|
| 360 |
+
strftime('{format_str}', datetime(date_unixtime, 'unixepoch')) as period,
|
| 361 |
+
COUNT(DISTINCT from_id) as count
|
| 362 |
+
FROM messages
|
| 363 |
+
WHERE date_unixtime BETWEEN ? AND ?
|
| 364 |
+
GROUP BY period
|
| 365 |
+
ORDER BY period
|
| 366 |
+
''', (start_ts, end_ts))
|
| 367 |
+
|
| 368 |
+
data = [{'label': row[0], 'value': row[1]} for row in cursor.fetchall()]
|
| 369 |
+
conn.close()
|
| 370 |
+
|
| 371 |
+
return jsonify(data)
|
| 372 |
+
|
| 373 |
+
|
| 374 |
+
@app.route('/api/chart/heatmap')
|
| 375 |
+
def api_chart_heatmap():
|
| 376 |
+
"""Get activity heatmap (hour of day vs day of week)."""
|
| 377 |
+
timeframe = request.args.get('timeframe', 'all')
|
| 378 |
+
start_ts, end_ts = parse_timeframe(timeframe)
|
| 379 |
+
|
| 380 |
+
conn = get_db()
|
| 381 |
+
|
| 382 |
+
cursor = conn.execute('''
|
| 383 |
+
SELECT
|
| 384 |
+
CAST(strftime('%w', datetime(date_unixtime, 'unixepoch')) AS INTEGER) as dow,
|
| 385 |
+
CAST(strftime('%H', datetime(date_unixtime, 'unixepoch')) AS INTEGER) as hour,
|
| 386 |
+
COUNT(*) as count
|
| 387 |
+
FROM messages
|
| 388 |
+
WHERE date_unixtime BETWEEN ? AND ?
|
| 389 |
+
GROUP BY dow, hour
|
| 390 |
+
''', (start_ts, end_ts))
|
| 391 |
+
|
| 392 |
+
# Initialize grid
|
| 393 |
+
heatmap = [[0 for _ in range(24)] for _ in range(7)]
|
| 394 |
+
|
| 395 |
+
for row in cursor.fetchall():
|
| 396 |
+
dow, hour, count = row
|
| 397 |
+
heatmap[dow][hour] = count
|
| 398 |
+
|
| 399 |
+
conn.close()
|
| 400 |
+
|
| 401 |
+
return jsonify({
|
| 402 |
+
'data': heatmap,
|
| 403 |
+
'days': ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'],
|
| 404 |
+
'hours': list(range(24))
|
| 405 |
+
})
|
| 406 |
+
|
| 407 |
+
|
| 408 |
+
@app.route('/api/chart/daily')
|
| 409 |
+
def api_chart_daily():
|
| 410 |
+
"""Get activity by day of week."""
|
| 411 |
+
timeframe = request.args.get('timeframe', 'all')
|
| 412 |
+
start_ts, end_ts = parse_timeframe(timeframe)
|
| 413 |
+
|
| 414 |
+
conn = get_db()
|
| 415 |
+
|
| 416 |
+
days = ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']
|
| 417 |
+
|
| 418 |
+
cursor = conn.execute('''
|
| 419 |
+
SELECT
|
| 420 |
+
CAST(strftime('%w', datetime(date_unixtime, 'unixepoch')) AS INTEGER) as dow,
|
| 421 |
+
COUNT(*) as count
|
| 422 |
+
FROM messages
|
| 423 |
+
WHERE date_unixtime BETWEEN ? AND ?
|
| 424 |
+
GROUP BY dow
|
| 425 |
+
ORDER BY dow
|
| 426 |
+
''', (start_ts, end_ts))
|
| 427 |
+
|
| 428 |
+
data = {days[row[0]]: row[1] for row in cursor.fetchall()}
|
| 429 |
+
conn.close()
|
| 430 |
+
|
| 431 |
+
return jsonify([{'label': day, 'value': data.get(day, 0)} for day in days])
|
| 432 |
+
|
| 433 |
+
|
| 434 |
+
@app.route('/api/chart/hourly')
|
| 435 |
+
def api_chart_hourly():
|
| 436 |
+
"""Get activity by hour of day."""
|
| 437 |
+
timeframe = request.args.get('timeframe', 'all')
|
| 438 |
+
start_ts, end_ts = parse_timeframe(timeframe)
|
| 439 |
+
|
| 440 |
+
conn = get_db()
|
| 441 |
+
|
| 442 |
+
cursor = conn.execute('''
|
| 443 |
+
SELECT
|
| 444 |
+
CAST(strftime('%H', datetime(date_unixtime, 'unixepoch')) AS INTEGER) as hour,
|
| 445 |
+
COUNT(*) as count
|
| 446 |
+
FROM messages
|
| 447 |
+
WHERE date_unixtime BETWEEN ? AND ?
|
| 448 |
+
GROUP BY hour
|
| 449 |
+
ORDER BY hour
|
| 450 |
+
''', (start_ts, end_ts))
|
| 451 |
+
|
| 452 |
+
data = {row[0]: row[1] for row in cursor.fetchall()}
|
| 453 |
+
conn.close()
|
| 454 |
+
|
| 455 |
+
return jsonify([{'label': f'{h:02d}:00', 'value': data.get(h, 0)} for h in range(24)])
|
| 456 |
+
|
| 457 |
+
|
| 458 |
+
# ==========================================
|
| 459 |
+
# API ENDPOINTS - USERS
|
| 460 |
+
# ==========================================
|
| 461 |
+
|
| 462 |
+
@app.route('/api/users')
|
| 463 |
+
def api_users():
|
| 464 |
+
"""Get user leaderboard including participants who never sent messages."""
|
| 465 |
+
timeframe = request.args.get('timeframe', 'all')
|
| 466 |
+
start_ts, end_ts = parse_timeframe(timeframe)
|
| 467 |
+
limit = int(request.args.get('limit', 50))
|
| 468 |
+
offset = int(request.args.get('offset', 0))
|
| 469 |
+
include_inactive = request.args.get('include_inactive', '1') == '1'
|
| 470 |
+
|
| 471 |
+
conn = get_db()
|
| 472 |
+
|
| 473 |
+
# Get total messages for percentage calculation
|
| 474 |
+
cursor = conn.execute('''
|
| 475 |
+
SELECT COUNT(*) FROM messages
|
| 476 |
+
WHERE date_unixtime BETWEEN ? AND ?
|
| 477 |
+
''', (start_ts, end_ts))
|
| 478 |
+
total_messages = cursor.fetchone()[0]
|
| 479 |
+
|
| 480 |
+
# Get user stats from messages
|
| 481 |
+
cursor = conn.execute('''
|
| 482 |
+
SELECT
|
| 483 |
+
from_id,
|
| 484 |
+
from_name,
|
| 485 |
+
COUNT(*) as message_count,
|
| 486 |
+
SUM(LENGTH(text_plain)) as char_count,
|
| 487 |
+
SUM(has_links) as links,
|
| 488 |
+
SUM(has_media) as media,
|
| 489 |
+
MIN(date_unixtime) as first_seen,
|
| 490 |
+
MAX(date_unixtime) as last_seen,
|
| 491 |
+
COUNT(DISTINCT date(datetime(date_unixtime, 'unixepoch'))) as active_days
|
| 492 |
+
FROM messages
|
| 493 |
+
WHERE date_unixtime BETWEEN ? AND ?
|
| 494 |
+
AND from_id IS NOT NULL AND from_id != ''
|
| 495 |
+
GROUP BY from_id
|
| 496 |
+
ORDER BY message_count DESC
|
| 497 |
+
''', (start_ts, end_ts))
|
| 498 |
+
|
| 499 |
+
active_users = []
|
| 500 |
+
active_user_ids = set()
|
| 501 |
+
for row in cursor.fetchall():
|
| 502 |
+
active_user_ids.add(row['from_id'])
|
| 503 |
+
active_users.append({
|
| 504 |
+
'user_id': row['from_id'],
|
| 505 |
+
'name': row['from_name'] or 'Unknown',
|
| 506 |
+
'messages': row['message_count'],
|
| 507 |
+
'characters': row['char_count'] or 0,
|
| 508 |
+
'percentage': round(100 * row['message_count'] / total_messages, 2) if total_messages else 0,
|
| 509 |
+
'links': row['links'] or 0,
|
| 510 |
+
'media': row['media'] or 0,
|
| 511 |
+
'first_seen': row['first_seen'],
|
| 512 |
+
'last_seen': row['last_seen'],
|
| 513 |
+
'active_days': row['active_days'],
|
| 514 |
+
'daily_average': round(row['message_count'] / max(1, row['active_days']), 1),
|
| 515 |
+
'is_participant': False,
|
| 516 |
+
'role': None,
|
| 517 |
+
})
|
| 518 |
+
|
| 519 |
+
# Try to enrich with participant data and add inactive participants
|
| 520 |
+
has_participants = False
|
| 521 |
+
try:
|
| 522 |
+
cursor = conn.execute('SELECT COUNT(*) FROM participants')
|
| 523 |
+
has_participants = cursor.fetchone()[0] > 0
|
| 524 |
+
except Exception:
|
| 525 |
+
pass
|
| 526 |
+
|
| 527 |
+
if has_participants:
|
| 528 |
+
# Enrich active users with participant data
|
| 529 |
+
participant_map = {}
|
| 530 |
+
cursor = conn.execute('SELECT * FROM participants')
|
| 531 |
+
for row in cursor.fetchall():
|
| 532 |
+
participant_map[row['user_id']] = dict(row)
|
| 533 |
+
|
| 534 |
+
for user in active_users:
|
| 535 |
+
p = participant_map.get(user['user_id'])
|
| 536 |
+
if p:
|
| 537 |
+
user['is_participant'] = True
|
| 538 |
+
user['username'] = p.get('username', '')
|
| 539 |
+
if p.get('is_creator'):
|
| 540 |
+
user['role'] = 'creator'
|
| 541 |
+
elif p.get('is_admin'):
|
| 542 |
+
user['role'] = 'admin'
|
| 543 |
+
elif p.get('is_bot'):
|
| 544 |
+
user['role'] = 'bot'
|
| 545 |
+
|
| 546 |
+
# Add inactive participants (those who never sent messages)
|
| 547 |
+
if include_inactive:
|
| 548 |
+
for uid, p in participant_map.items():
|
| 549 |
+
if uid not in active_user_ids:
|
| 550 |
+
name = f"{p.get('first_name', '')} {p.get('last_name', '')}".strip()
|
| 551 |
+
role = None
|
| 552 |
+
if p.get('is_creator'):
|
| 553 |
+
role = 'creator'
|
| 554 |
+
elif p.get('is_admin'):
|
| 555 |
+
role = 'admin'
|
| 556 |
+
elif p.get('is_bot'):
|
| 557 |
+
role = 'bot'
|
| 558 |
+
|
| 559 |
+
active_users.append({
|
| 560 |
+
'user_id': uid,
|
| 561 |
+
'name': name or 'Unknown',
|
| 562 |
+
'messages': 0,
|
| 563 |
+
'characters': 0,
|
| 564 |
+
'percentage': 0,
|
| 565 |
+
'links': 0,
|
| 566 |
+
'media': 0,
|
| 567 |
+
'first_seen': None,
|
| 568 |
+
'last_seen': None,
|
| 569 |
+
'active_days': 0,
|
| 570 |
+
'daily_average': 0,
|
| 571 |
+
'is_participant': True,
|
| 572 |
+
'username': p.get('username', ''),
|
| 573 |
+
'role': role,
|
| 574 |
+
})
|
| 575 |
+
|
| 576 |
+
# Assign ranks (active users first, then inactive)
|
| 577 |
+
users_with_rank = []
|
| 578 |
+
for i, user in enumerate(active_users):
|
| 579 |
+
user['rank'] = i + 1 if user['messages'] > 0 else None
|
| 580 |
+
users_with_rank.append(user)
|
| 581 |
+
|
| 582 |
+
total_users = len(users_with_rank)
|
| 583 |
+
total_active = len(active_user_ids)
|
| 584 |
+
|
| 585 |
+
# Apply pagination
|
| 586 |
+
page_users = users_with_rank[offset:offset + limit]
|
| 587 |
+
|
| 588 |
+
conn.close()
|
| 589 |
+
|
| 590 |
+
return jsonify({
|
| 591 |
+
'users': page_users,
|
| 592 |
+
'total': total_users,
|
| 593 |
+
'total_active': total_active,
|
| 594 |
+
'total_participants': total_users - total_active if has_participants else 0,
|
| 595 |
+
'limit': limit,
|
| 596 |
+
'offset': offset
|
| 597 |
+
})
|
| 598 |
+
|
| 599 |
+
|
| 600 |
+
@app.route('/api/user/<user_id>')
|
| 601 |
+
def api_user_detail(user_id):
|
| 602 |
+
"""Get detailed stats for a specific user."""
|
| 603 |
+
timeframe = request.args.get('timeframe', 'all')
|
| 604 |
+
start_ts, end_ts = parse_timeframe(timeframe)
|
| 605 |
+
|
| 606 |
+
conn = get_db()
|
| 607 |
+
|
| 608 |
+
# Basic stats
|
| 609 |
+
cursor = conn.execute('''
|
| 610 |
+
SELECT
|
| 611 |
+
from_name,
|
| 612 |
+
COUNT(*) as messages,
|
| 613 |
+
SUM(LENGTH(text_plain)) as characters,
|
| 614 |
+
SUM(has_links) as links,
|
| 615 |
+
SUM(has_media) as media,
|
| 616 |
+
SUM(has_mentions) as mentions,
|
| 617 |
+
MIN(date_unixtime) as first_seen,
|
| 618 |
+
MAX(date_unixtime) as last_seen,
|
| 619 |
+
COUNT(DISTINCT date(datetime(date_unixtime, 'unixepoch'))) as active_days
|
| 620 |
+
FROM messages
|
| 621 |
+
WHERE from_id = ?
|
| 622 |
+
AND date_unixtime BETWEEN ? AND ?
|
| 623 |
+
''', (user_id, start_ts, end_ts))
|
| 624 |
+
row = cursor.fetchone()
|
| 625 |
+
|
| 626 |
+
if not row or not row['messages']:
|
| 627 |
+
conn.close()
|
| 628 |
+
return jsonify({'error': 'User not found'}), 404
|
| 629 |
+
|
| 630 |
+
# Replies sent
|
| 631 |
+
cursor = conn.execute('''
|
| 632 |
+
SELECT COUNT(*) FROM messages
|
| 633 |
+
WHERE from_id = ? AND reply_to_message_id IS NOT NULL
|
| 634 |
+
AND date_unixtime BETWEEN ? AND ?
|
| 635 |
+
''', (user_id, start_ts, end_ts))
|
| 636 |
+
replies_sent = cursor.fetchone()[0]
|
| 637 |
+
|
| 638 |
+
# Replies received
|
| 639 |
+
cursor = conn.execute('''
|
| 640 |
+
SELECT COUNT(*) FROM messages m1
|
| 641 |
+
JOIN messages m2 ON m1.reply_to_message_id = m2.id
|
| 642 |
+
WHERE m2.from_id = ?
|
| 643 |
+
AND m1.date_unixtime BETWEEN ? AND ?
|
| 644 |
+
''', (user_id, start_ts, end_ts))
|
| 645 |
+
replies_received = cursor.fetchone()[0]
|
| 646 |
+
|
| 647 |
+
# Activity by hour
|
| 648 |
+
cursor = conn.execute('''
|
| 649 |
+
SELECT
|
| 650 |
+
CAST(strftime('%H', datetime(date_unixtime, 'unixepoch')) AS INTEGER) as hour,
|
| 651 |
+
COUNT(*) as count
|
| 652 |
+
FROM messages
|
| 653 |
+
WHERE from_id = ?
|
| 654 |
+
AND date_unixtime BETWEEN ? AND ?
|
| 655 |
+
GROUP BY hour
|
| 656 |
+
''', (user_id, start_ts, end_ts))
|
| 657 |
+
hourly = {row[0]: row[1] for row in cursor.fetchall()}
|
| 658 |
+
|
| 659 |
+
# Activity over time
|
| 660 |
+
cursor = conn.execute('''
|
| 661 |
+
SELECT
|
| 662 |
+
date(datetime(date_unixtime, 'unixepoch')) as day,
|
| 663 |
+
COUNT(*) as count
|
| 664 |
+
FROM messages
|
| 665 |
+
WHERE from_id = ?
|
| 666 |
+
AND date_unixtime BETWEEN ? AND ?
|
| 667 |
+
GROUP BY day
|
| 668 |
+
ORDER BY day DESC
|
| 669 |
+
LIMIT 30
|
| 670 |
+
''', (user_id, start_ts, end_ts))
|
| 671 |
+
daily = [{'date': r[0], 'count': r[1]} for r in cursor.fetchall()]
|
| 672 |
+
|
| 673 |
+
# Rank
|
| 674 |
+
cursor = conn.execute('''
|
| 675 |
+
SELECT COUNT(*) + 1 FROM (
|
| 676 |
+
SELECT from_id, COUNT(*) as cnt FROM messages
|
| 677 |
+
WHERE date_unixtime BETWEEN ? AND ?
|
| 678 |
+
GROUP BY from_id
|
| 679 |
+
) WHERE cnt > ?
|
| 680 |
+
''', (start_ts, end_ts, row['messages']))
|
| 681 |
+
rank = cursor.fetchone()[0]
|
| 682 |
+
|
| 683 |
+
conn.close()
|
| 684 |
+
|
| 685 |
+
return jsonify({
|
| 686 |
+
'user_id': user_id,
|
| 687 |
+
'name': row['from_name'] or 'Unknown',
|
| 688 |
+
'messages': row['messages'],
|
| 689 |
+
'characters': row['characters'] or 0,
|
| 690 |
+
'links': row['links'] or 0,
|
| 691 |
+
'media': row['media'] or 0,
|
| 692 |
+
'mentions': row['mentions'] or 0,
|
| 693 |
+
'first_seen': row['first_seen'],
|
| 694 |
+
'last_seen': row['last_seen'],
|
| 695 |
+
'active_days': row['active_days'],
|
| 696 |
+
'daily_average': round(row['messages'] / max(1, row['active_days']), 1),
|
| 697 |
+
'replies_sent': replies_sent,
|
| 698 |
+
'replies_received': replies_received,
|
| 699 |
+
'rank': rank,
|
| 700 |
+
'hourly_activity': [hourly.get(h, 0) for h in range(24)],
|
| 701 |
+
'daily_activity': daily
|
| 702 |
+
})
|
| 703 |
+
|
| 704 |
+
|
| 705 |
+
@app.route('/api/user/<user_id>/profile')
|
| 706 |
+
def api_user_profile(user_id):
|
| 707 |
+
"""Get comprehensive user profile with all available statistics."""
|
| 708 |
+
conn = get_db()
|
| 709 |
+
|
| 710 |
+
# ---- Participant info (from Telethon sync) ----
|
| 711 |
+
participant = None
|
| 712 |
+
try:
|
| 713 |
+
cursor = conn.execute('SELECT * FROM participants WHERE user_id = ?', (user_id,))
|
| 714 |
+
row = cursor.fetchone()
|
| 715 |
+
if row:
|
| 716 |
+
participant = dict(row)
|
| 717 |
+
except Exception:
|
| 718 |
+
pass # Table might not exist yet
|
| 719 |
+
|
| 720 |
+
# ---- Basic message stats ----
|
| 721 |
+
cursor = conn.execute('''
|
| 722 |
+
SELECT
|
| 723 |
+
from_name,
|
| 724 |
+
COUNT(*) as total_messages,
|
| 725 |
+
SUM(text_length) as total_chars,
|
| 726 |
+
AVG(text_length) as avg_length,
|
| 727 |
+
MAX(text_length) as max_length,
|
| 728 |
+
SUM(has_links) as links_shared,
|
| 729 |
+
SUM(has_media) as media_sent,
|
| 730 |
+
SUM(has_photo) as photos_sent,
|
| 731 |
+
SUM(has_mentions) as mentions_made,
|
| 732 |
+
SUM(is_edited) as edits,
|
| 733 |
+
MIN(date_unixtime) as first_message,
|
| 734 |
+
MAX(date_unixtime) as last_message,
|
| 735 |
+
COUNT(DISTINCT date(datetime(date_unixtime, 'unixepoch'))) as active_days
|
| 736 |
+
FROM messages WHERE from_id = ?
|
| 737 |
+
''', (user_id,))
|
| 738 |
+
stats = cursor.fetchone()
|
| 739 |
+
|
| 740 |
+
if not stats or not stats['total_messages']:
|
| 741 |
+
# User might be a participant who never sent a message
|
| 742 |
+
if participant:
|
| 743 |
+
conn.close()
|
| 744 |
+
return jsonify({
|
| 745 |
+
'user_id': user_id,
|
| 746 |
+
'participant': participant,
|
| 747 |
+
'has_messages': False,
|
| 748 |
+
'name': f"{participant.get('first_name', '')} {participant.get('last_name', '')}".strip()
|
| 749 |
+
})
|
| 750 |
+
conn.close()
|
| 751 |
+
return jsonify({'error': 'User not found'}), 404
|
| 752 |
+
|
| 753 |
+
stats = dict(stats)
|
| 754 |
+
|
| 755 |
+
# ---- Replies sent (who does this user reply to most) ----
|
| 756 |
+
cursor = conn.execute('''
|
| 757 |
+
SELECT r.from_name, r.from_id, COUNT(*) as cnt
|
| 758 |
+
FROM messages m
|
| 759 |
+
JOIN messages r ON m.reply_to_message_id = r.id
|
| 760 |
+
WHERE m.from_id = ? AND r.from_id != ?
|
| 761 |
+
GROUP BY r.from_id
|
| 762 |
+
ORDER BY cnt DESC
|
| 763 |
+
LIMIT 10
|
| 764 |
+
''', (user_id, user_id))
|
| 765 |
+
replies_to = [{'name': r[0], 'user_id': r[1], 'count': r[2]} for r in cursor.fetchall()]
|
| 766 |
+
|
| 767 |
+
# ---- Replies received (who replies to this user most) ----
|
| 768 |
+
cursor = conn.execute('''
|
| 769 |
+
SELECT m.from_name, m.from_id, COUNT(*) as cnt
|
| 770 |
+
FROM messages m
|
| 771 |
+
JOIN messages r ON m.reply_to_message_id = r.id
|
| 772 |
+
WHERE r.from_id = ? AND m.from_id != ?
|
| 773 |
+
GROUP BY m.from_id
|
| 774 |
+
ORDER BY cnt DESC
|
| 775 |
+
LIMIT 10
|
| 776 |
+
''', (user_id, user_id))
|
| 777 |
+
replies_from = [{'name': r[0], 'user_id': r[1], 'count': r[2]} for r in cursor.fetchall()]
|
| 778 |
+
|
| 779 |
+
# ---- Total replies sent/received ----
|
| 780 |
+
cursor = conn.execute('''
|
| 781 |
+
SELECT COUNT(*) FROM messages
|
| 782 |
+
WHERE from_id = ? AND reply_to_message_id IS NOT NULL
|
| 783 |
+
''', (user_id,))
|
| 784 |
+
total_replies_sent = cursor.fetchone()[0]
|
| 785 |
+
|
| 786 |
+
cursor = conn.execute('''
|
| 787 |
+
SELECT COUNT(*) FROM messages m
|
| 788 |
+
JOIN messages r ON m.reply_to_message_id = r.id
|
| 789 |
+
WHERE r.from_id = ? AND m.from_id != ?
|
| 790 |
+
''', (user_id, user_id))
|
| 791 |
+
total_replies_received = cursor.fetchone()[0]
|
| 792 |
+
|
| 793 |
+
# ---- Forwarded messages ----
|
| 794 |
+
cursor = conn.execute('''
|
| 795 |
+
SELECT COUNT(*) FROM messages
|
| 796 |
+
WHERE from_id = ? AND forwarded_from IS NOT NULL
|
| 797 |
+
''', (user_id,))
|
| 798 |
+
forwards_sent = cursor.fetchone()[0]
|
| 799 |
+
|
| 800 |
+
# ---- Top forwarded sources ----
|
| 801 |
+
cursor = conn.execute('''
|
| 802 |
+
SELECT forwarded_from, COUNT(*) as cnt
|
| 803 |
+
FROM messages
|
| 804 |
+
WHERE from_id = ? AND forwarded_from IS NOT NULL
|
| 805 |
+
GROUP BY forwarded_from
|
| 806 |
+
ORDER BY cnt DESC
|
| 807 |
+
LIMIT 5
|
| 808 |
+
''', (user_id,))
|
| 809 |
+
top_forward_sources = [{'name': r[0], 'count': r[1]} for r in cursor.fetchall()]
|
| 810 |
+
|
| 811 |
+
# ---- Activity by hour ----
|
| 812 |
+
cursor = conn.execute('''
|
| 813 |
+
SELECT
|
| 814 |
+
CAST(strftime('%H', datetime(date_unixtime, 'unixepoch')) AS INTEGER) as hour,
|
| 815 |
+
COUNT(*) as count
|
| 816 |
+
FROM messages WHERE from_id = ?
|
| 817 |
+
GROUP BY hour
|
| 818 |
+
''', (user_id,))
|
| 819 |
+
hourly = {r[0]: r[1] for r in cursor.fetchall()}
|
| 820 |
+
|
| 821 |
+
# ---- Activity by weekday ----
|
| 822 |
+
cursor = conn.execute('''
|
| 823 |
+
SELECT
|
| 824 |
+
CAST(strftime('%w', datetime(date_unixtime, 'unixepoch')) AS INTEGER) as weekday,
|
| 825 |
+
COUNT(*) as count
|
| 826 |
+
FROM messages WHERE from_id = ?
|
| 827 |
+
GROUP BY weekday
|
| 828 |
+
''', (user_id,))
|
| 829 |
+
weekday_names = ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']
|
| 830 |
+
weekday_data = {r[0]: r[1] for r in cursor.fetchall()}
|
| 831 |
+
weekday_activity = [{'day': weekday_names[d], 'count': weekday_data.get(d, 0)} for d in range(7)]
|
| 832 |
+
|
| 833 |
+
# ---- Activity trend (last 90 days) ----
|
| 834 |
+
cursor = conn.execute('''
|
| 835 |
+
SELECT
|
| 836 |
+
date(datetime(date_unixtime, 'unixepoch')) as day,
|
| 837 |
+
COUNT(*) as count
|
| 838 |
+
FROM messages WHERE from_id = ?
|
| 839 |
+
GROUP BY day
|
| 840 |
+
ORDER BY day DESC
|
| 841 |
+
LIMIT 90
|
| 842 |
+
''', (user_id,))
|
| 843 |
+
daily_activity = [{'date': r[0], 'count': r[1]} for r in cursor.fetchall()]
|
| 844 |
+
|
| 845 |
+
# ---- Monthly trend ----
|
| 846 |
+
cursor = conn.execute('''
|
| 847 |
+
SELECT
|
| 848 |
+
strftime('%Y-%m', datetime(date_unixtime, 'unixepoch')) as month,
|
| 849 |
+
COUNT(*) as count
|
| 850 |
+
FROM messages WHERE from_id = ?
|
| 851 |
+
GROUP BY month
|
| 852 |
+
ORDER BY month
|
| 853 |
+
''', (user_id,))
|
| 854 |
+
monthly_activity = [{'month': r[0], 'count': r[1]} for r in cursor.fetchall()]
|
| 855 |
+
|
| 856 |
+
# ---- Top links shared ----
|
| 857 |
+
cursor = conn.execute('''
|
| 858 |
+
SELECT e.value, COUNT(*) as cnt
|
| 859 |
+
FROM entities e
|
| 860 |
+
JOIN messages m ON e.message_id = m.id
|
| 861 |
+
WHERE m.from_id = ? AND e.type = 'link'
|
| 862 |
+
GROUP BY e.value
|
| 863 |
+
ORDER BY cnt DESC
|
| 864 |
+
LIMIT 10
|
| 865 |
+
''', (user_id,))
|
| 866 |
+
top_links = [{'url': r[0], 'count': r[1]} for r in cursor.fetchall()]
|
| 867 |
+
|
| 868 |
+
# ---- Rank among all users ----
|
| 869 |
+
cursor = conn.execute('''
|
| 870 |
+
SELECT COUNT(*) + 1 FROM (
|
| 871 |
+
SELECT from_id, COUNT(*) as cnt FROM messages GROUP BY from_id
|
| 872 |
+
) WHERE cnt > ?
|
| 873 |
+
''', (stats['total_messages'],))
|
| 874 |
+
rank = cursor.fetchone()[0]
|
| 875 |
+
|
| 876 |
+
cursor = conn.execute('SELECT COUNT(DISTINCT from_id) FROM messages')
|
| 877 |
+
total_users = cursor.fetchone()[0]
|
| 878 |
+
|
| 879 |
+
# ---- Average reply time (when replying to someone) ----
|
| 880 |
+
cursor = conn.execute('''
|
| 881 |
+
SELECT AVG(m.date_unixtime - r.date_unixtime)
|
| 882 |
+
FROM messages m
|
| 883 |
+
JOIN messages r ON m.reply_to_message_id = r.id
|
| 884 |
+
WHERE m.from_id = ?
|
| 885 |
+
AND m.date_unixtime - r.date_unixtime > 0
|
| 886 |
+
AND m.date_unixtime - r.date_unixtime < 86400
|
| 887 |
+
''', (user_id,))
|
| 888 |
+
avg_reply_time = cursor.fetchone()[0]
|
| 889 |
+
|
| 890 |
+
conn.close()
|
| 891 |
+
|
| 892 |
+
# ---- Build response ----
|
| 893 |
+
total_msgs = stats['total_messages']
|
| 894 |
+
active_days = stats['active_days'] or 1
|
| 895 |
+
first_msg = stats['first_message']
|
| 896 |
+
last_msg = stats['last_message']
|
| 897 |
+
span_days = max(1, (last_msg - first_msg) / 86400) if first_msg and last_msg else 1
|
| 898 |
+
|
| 899 |
+
return jsonify({
|
| 900 |
+
'user_id': user_id,
|
| 901 |
+
'name': stats['from_name'] or 'Unknown',
|
| 902 |
+
'has_messages': True,
|
| 903 |
+
'participant': participant,
|
| 904 |
+
|
| 905 |
+
# Core stats
|
| 906 |
+
'total_messages': total_msgs,
|
| 907 |
+
'total_characters': stats['total_chars'] or 0,
|
| 908 |
+
'avg_message_length': round(stats['avg_length'] or 0, 1),
|
| 909 |
+
'max_message_length': stats['max_length'] or 0,
|
| 910 |
+
'links_shared': stats['links_shared'] or 0,
|
| 911 |
+
'media_sent': stats['media_sent'] or 0,
|
| 912 |
+
'photos_sent': stats['photos_sent'] or 0,
|
| 913 |
+
'mentions_made': stats['mentions_made'] or 0,
|
| 914 |
+
'edits': stats['edits'] or 0,
|
| 915 |
+
'forwards_sent': forwards_sent,
|
| 916 |
+
|
| 917 |
+
# Time stats
|
| 918 |
+
'first_message': first_msg,
|
| 919 |
+
'last_message': last_msg,
|
| 920 |
+
'active_days': active_days,
|
| 921 |
+
'daily_average': round(total_msgs / active_days, 1),
|
| 922 |
+
'messages_per_calendar_day': round(total_msgs / span_days, 1),
|
| 923 |
+
|
| 924 |
+
# Reply stats
|
| 925 |
+
'total_replies_sent': total_replies_sent,
|
| 926 |
+
'total_replies_received': total_replies_received,
|
| 927 |
+
'reply_ratio': round(total_replies_sent / max(1, total_msgs) * 100, 1),
|
| 928 |
+
'avg_reply_time_seconds': round(avg_reply_time) if avg_reply_time else None,
|
| 929 |
+
'replies_to': replies_to,
|
| 930 |
+
'replies_from': replies_from,
|
| 931 |
+
|
| 932 |
+
# Forward stats
|
| 933 |
+
'top_forward_sources': top_forward_sources,
|
| 934 |
+
|
| 935 |
+
# Ranking
|
| 936 |
+
'rank': rank,
|
| 937 |
+
'total_active_users': total_users,
|
| 938 |
+
|
| 939 |
+
# Activity patterns
|
| 940 |
+
'hourly_activity': [hourly.get(h, 0) for h in range(24)],
|
| 941 |
+
'weekday_activity': weekday_activity,
|
| 942 |
+
'daily_activity': daily_activity,
|
| 943 |
+
'monthly_activity': monthly_activity,
|
| 944 |
+
|
| 945 |
+
# Content
|
| 946 |
+
'top_links': top_links,
|
| 947 |
+
})
|
| 948 |
+
|
| 949 |
+
|
| 950 |
+
# ==========================================
|
| 951 |
+
# API ENDPOINTS - CONTENT ANALYTICS
|
| 952 |
+
# ==========================================
|
| 953 |
+
|
| 954 |
+
@app.route('/api/top/words')
|
| 955 |
+
def api_top_words():
|
| 956 |
+
"""Get top words."""
|
| 957 |
+
timeframe = request.args.get('timeframe', 'all')
|
| 958 |
+
start_ts, end_ts = parse_timeframe(timeframe)
|
| 959 |
+
limit = int(request.args.get('limit', 30))
|
| 960 |
+
|
| 961 |
+
conn = get_db()
|
| 962 |
+
|
| 963 |
+
cursor = conn.execute('''
|
| 964 |
+
SELECT text_plain FROM messages
|
| 965 |
+
WHERE date_unixtime BETWEEN ? AND ?
|
| 966 |
+
AND text_plain IS NOT NULL
|
| 967 |
+
''', (start_ts, end_ts))
|
| 968 |
+
|
| 969 |
+
import re
|
| 970 |
+
word_pattern = re.compile(r'[\u0590-\u05FFa-zA-Z]{3,}')
|
| 971 |
+
words = []
|
| 972 |
+
|
| 973 |
+
for row in cursor.fetchall():
|
| 974 |
+
words.extend(word_pattern.findall(row[0].lower()))
|
| 975 |
+
|
| 976 |
+
conn.close()
|
| 977 |
+
|
| 978 |
+
top_words = top_k_frequent(words, limit)
|
| 979 |
+
return jsonify([{'word': w, 'count': c} for w, c in top_words])
|
| 980 |
+
|
| 981 |
+
|
| 982 |
+
@app.route('/api/top/domains')
|
| 983 |
+
def api_top_domains():
|
| 984 |
+
"""Get top shared domains."""
|
| 985 |
+
timeframe = request.args.get('timeframe', 'all')
|
| 986 |
+
start_ts, end_ts = parse_timeframe(timeframe)
|
| 987 |
+
limit = int(request.args.get('limit', 20))
|
| 988 |
+
|
| 989 |
+
conn = get_db()
|
| 990 |
+
|
| 991 |
+
cursor = conn.execute('''
|
| 992 |
+
SELECT e.value FROM entities e
|
| 993 |
+
JOIN messages m ON e.message_id = m.id
|
| 994 |
+
WHERE e.type = 'link'
|
| 995 |
+
AND m.date_unixtime BETWEEN ? AND ?
|
| 996 |
+
''', (start_ts, end_ts))
|
| 997 |
+
|
| 998 |
+
import re
|
| 999 |
+
domain_pattern = re.compile(r'https?://(?:www\.)?([^/]+)')
|
| 1000 |
+
domains = []
|
| 1001 |
+
|
| 1002 |
+
for row in cursor.fetchall():
|
| 1003 |
+
match = domain_pattern.match(row[0])
|
| 1004 |
+
if match:
|
| 1005 |
+
domains.append(match.group(1))
|
| 1006 |
+
|
| 1007 |
+
conn.close()
|
| 1008 |
+
|
| 1009 |
+
top_domains = top_k_frequent(domains, limit)
|
| 1010 |
+
return jsonify([{'domain': d, 'count': c} for d, c in top_domains])
|
| 1011 |
+
|
| 1012 |
+
|
| 1013 |
+
@app.route('/api/top/mentions')
|
| 1014 |
+
def api_top_mentions():
|
| 1015 |
+
"""Get top mentioned users."""
|
| 1016 |
+
timeframe = request.args.get('timeframe', 'all')
|
| 1017 |
+
start_ts, end_ts = parse_timeframe(timeframe)
|
| 1018 |
+
limit = int(request.args.get('limit', 20))
|
| 1019 |
+
|
| 1020 |
+
conn = get_db()
|
| 1021 |
+
|
| 1022 |
+
cursor = conn.execute('''
|
| 1023 |
+
SELECT e.value, COUNT(*) as count FROM entities e
|
| 1024 |
+
JOIN messages m ON e.message_id = m.id
|
| 1025 |
+
WHERE e.type = 'mention'
|
| 1026 |
+
AND m.date_unixtime BETWEEN ? AND ?
|
| 1027 |
+
GROUP BY e.value
|
| 1028 |
+
ORDER BY count DESC
|
| 1029 |
+
LIMIT ?
|
| 1030 |
+
''', (start_ts, end_ts, limit))
|
| 1031 |
+
|
| 1032 |
+
data = [{'mention': row[0], 'count': row[1]} for row in cursor.fetchall()]
|
| 1033 |
+
conn.close()
|
| 1034 |
+
|
| 1035 |
+
return jsonify(data)
|
| 1036 |
+
|
| 1037 |
+
|
| 1038 |
+
# ==========================================
|
| 1039 |
+
# API ENDPOINTS - ADVANCED ANALYTICS (Course Algorithms)
|
| 1040 |
+
# ==========================================
|
| 1041 |
+
|
| 1042 |
+
@app.route('/api/similar/<int:message_id>')
|
| 1043 |
+
def api_similar_messages(message_id):
|
| 1044 |
+
"""
|
| 1045 |
+
Find messages similar to a given message using LCS algorithm.
|
| 1046 |
+
|
| 1047 |
+
Algorithm: LCS (Longest Common Subsequence)
|
| 1048 |
+
Time: O(n * m) where n = sample size, m = avg message length
|
| 1049 |
+
Use case: Detect reposts, spam, similar content
|
| 1050 |
+
"""
|
| 1051 |
+
threshold = float(request.args.get('threshold', 0.7))
|
| 1052 |
+
limit = int(request.args.get('limit', 10))
|
| 1053 |
+
sample_size = int(request.args.get('sample', 1000))
|
| 1054 |
+
|
| 1055 |
+
conn = get_db()
|
| 1056 |
+
|
| 1057 |
+
# Get the target message
|
| 1058 |
+
cursor = conn.execute('''
|
| 1059 |
+
SELECT text_plain, from_name, date FROM messages WHERE id = ?
|
| 1060 |
+
''', (message_id,))
|
| 1061 |
+
target = cursor.fetchone()
|
| 1062 |
+
|
| 1063 |
+
if not target or not target['text_plain']:
|
| 1064 |
+
conn.close()
|
| 1065 |
+
return jsonify({'error': 'Message not found or empty'}), 404
|
| 1066 |
+
|
| 1067 |
+
target_text = target['text_plain']
|
| 1068 |
+
|
| 1069 |
+
# Get sample of messages to compare (excluding the target)
|
| 1070 |
+
cursor = conn.execute('''
|
| 1071 |
+
SELECT id, text_plain, from_name, date FROM messages
|
| 1072 |
+
WHERE id != ? AND text_plain IS NOT NULL AND LENGTH(text_plain) > 20
|
| 1073 |
+
ORDER BY RANDOM()
|
| 1074 |
+
LIMIT ?
|
| 1075 |
+
''', (message_id, sample_size))
|
| 1076 |
+
|
| 1077 |
+
messages = [(row['id'], row['text_plain']) for row in cursor.fetchall()]
|
| 1078 |
+
conn.close()
|
| 1079 |
+
|
| 1080 |
+
# Find similar messages using LCS
|
| 1081 |
+
similar = []
|
| 1082 |
+
for msg_id, text in messages:
|
| 1083 |
+
sim = lcs_similarity(target_text, text)
|
| 1084 |
+
if sim >= threshold:
|
| 1085 |
+
similar.append({
|
| 1086 |
+
'id': msg_id,
|
| 1087 |
+
'similarity': round(sim * 100, 1),
|
| 1088 |
+
'text': text[:200] + '...' if len(text) > 200 else text
|
| 1089 |
+
})
|
| 1090 |
+
|
| 1091 |
+
# Sort by similarity descending and limit
|
| 1092 |
+
similar.sort(key=lambda x: x['similarity'], reverse=True)
|
| 1093 |
+
similar = similar[:limit]
|
| 1094 |
+
|
| 1095 |
+
return jsonify({
|
| 1096 |
+
'target': {
|
| 1097 |
+
'id': message_id,
|
| 1098 |
+
'text': target_text[:200] + '...' if len(target_text) > 200 else target_text,
|
| 1099 |
+
'from': target['from_name'],
|
| 1100 |
+
'date': target['date']
|
| 1101 |
+
},
|
| 1102 |
+
'similar': similar,
|
| 1103 |
+
'algorithm': 'LCS (Longest Common Subsequence)',
|
| 1104 |
+
'threshold': threshold
|
| 1105 |
+
})
|
| 1106 |
+
|
| 1107 |
+
|
| 1108 |
+
@app.route('/api/analytics/similar')
|
| 1109 |
+
def api_find_all_similar():
|
| 1110 |
+
"""
|
| 1111 |
+
Find all similar message pairs in the database.
|
| 1112 |
+
|
| 1113 |
+
Algorithm: LCS with early termination
|
| 1114 |
+
Time: O(n² * m) where n = sample size, m = avg message length
|
| 1115 |
+
Use case: Detect spam campaigns, repeated content
|
| 1116 |
+
"""
|
| 1117 |
+
timeframe = request.args.get('timeframe', 'all')
|
| 1118 |
+
threshold = float(request.args.get('threshold', 0.8))
|
| 1119 |
+
sample_size = int(request.args.get('sample', 500))
|
| 1120 |
+
start_ts, end_ts = parse_timeframe(timeframe)
|
| 1121 |
+
|
| 1122 |
+
conn = get_db()
|
| 1123 |
+
|
| 1124 |
+
cursor = conn.execute('''
|
| 1125 |
+
SELECT id, text_plain, from_name, from_id FROM messages
|
| 1126 |
+
WHERE date_unixtime BETWEEN ? AND ?
|
| 1127 |
+
AND text_plain IS NOT NULL AND LENGTH(text_plain) > 30
|
| 1128 |
+
ORDER BY RANDOM()
|
| 1129 |
+
LIMIT ?
|
| 1130 |
+
''', (start_ts, end_ts, sample_size))
|
| 1131 |
+
|
| 1132 |
+
messages = [(row['id'], row['text_plain'], row['from_name'], row['from_id'])
|
| 1133 |
+
for row in cursor.fetchall()]
|
| 1134 |
+
conn.close()
|
| 1135 |
+
|
| 1136 |
+
# Use our LCS algorithm to find similar pairs
|
| 1137 |
+
message_pairs = [(id_, text) for id_, text, _, _ in messages]
|
| 1138 |
+
similar_pairs = find_similar_messages(message_pairs, threshold=threshold, min_length=30)
|
| 1139 |
+
|
| 1140 |
+
# Build result with user info
|
| 1141 |
+
id_to_info = {id_: (name, uid) for id_, _, name, uid in messages}
|
| 1142 |
+
id_to_text = {id_: text for id_, text, _, _ in messages}
|
| 1143 |
+
|
| 1144 |
+
results = []
|
| 1145 |
+
for id1, id2, sim in similar_pairs[:50]: # Limit to top 50
|
| 1146 |
+
results.append({
|
| 1147 |
+
'message1': {
|
| 1148 |
+
'id': id1,
|
| 1149 |
+
'text': id_to_text[id1][:150],
|
| 1150 |
+
'from': id_to_info[id1][0]
|
| 1151 |
+
},
|
| 1152 |
+
'message2': {
|
| 1153 |
+
'id': id2,
|
| 1154 |
+
'text': id_to_text[id2][:150],
|
| 1155 |
+
'from': id_to_info[id2][0]
|
| 1156 |
+
},
|
| 1157 |
+
'similarity': round(sim * 100, 1)
|
| 1158 |
+
})
|
| 1159 |
+
|
| 1160 |
+
return jsonify({
|
| 1161 |
+
'pairs': results,
|
| 1162 |
+
'total_found': len(similar_pairs),
|
| 1163 |
+
'algorithm': 'LCS (Longest Common Subsequence)',
|
| 1164 |
+
'threshold': threshold,
|
| 1165 |
+
'sample_size': sample_size
|
| 1166 |
+
})
|
| 1167 |
+
|
| 1168 |
+
|
| 1169 |
+
@app.route('/api/user/rank/<user_id>')
|
| 1170 |
+
def api_user_rank_efficient(user_id):
|
| 1171 |
+
"""
|
| 1172 |
+
Get user rank using RankTree for O(log n) lookup.
|
| 1173 |
+
|
| 1174 |
+
Algorithm: Order Statistics Tree (AVL-based Rank Tree)
|
| 1175 |
+
Time: O(log n) instead of O(n) SQL scan
|
| 1176 |
+
Use case: Real-time user ranking queries
|
| 1177 |
+
"""
|
| 1178 |
+
timeframe = request.args.get('timeframe', 'all')
|
| 1179 |
+
tree = get_user_rank_tree(timeframe)
|
| 1180 |
+
|
| 1181 |
+
# Find user in tree by iterating (still O(n) for lookup, but rank is O(log n))
|
| 1182 |
+
# For true O(log n), we'd need to store user_id as key
|
| 1183 |
+
start_ts, end_ts = parse_timeframe(timeframe)
|
| 1184 |
+
conn = get_db()
|
| 1185 |
+
|
| 1186 |
+
cursor = conn.execute('''
|
| 1187 |
+
SELECT COUNT(*) as count FROM messages
|
| 1188 |
+
WHERE from_id = ? AND date_unixtime BETWEEN ? AND ?
|
| 1189 |
+
''', (user_id, start_ts, end_ts))
|
| 1190 |
+
user_count = cursor.fetchone()['count']
|
| 1191 |
+
|
| 1192 |
+
if user_count == 0:
|
| 1193 |
+
conn.close()
|
| 1194 |
+
return jsonify({'error': 'User not found'}), 404
|
| 1195 |
+
|
| 1196 |
+
# Use rank tree to find rank (O(log n))
|
| 1197 |
+
rank = tree.rank(-user_count) # Negative because tree uses negative counts
|
| 1198 |
+
|
| 1199 |
+
# Get total users
|
| 1200 |
+
total = len(tree)
|
| 1201 |
+
|
| 1202 |
+
conn.close()
|
| 1203 |
+
|
| 1204 |
+
return jsonify({
|
| 1205 |
+
'user_id': user_id,
|
| 1206 |
+
'messages': user_count,
|
| 1207 |
+
'rank': rank,
|
| 1208 |
+
'total_users': total,
|
| 1209 |
+
'percentile': round(100 * (total - rank + 1) / total, 1) if total > 0 else 0,
|
| 1210 |
+
'algorithm': 'RankTree (Order Statistics Tree)',
|
| 1211 |
+
'complexity': 'O(log n)'
|
| 1212 |
+
})
|
| 1213 |
+
|
| 1214 |
+
|
| 1215 |
+
@app.route('/api/user/by-rank/<int:rank>')
|
| 1216 |
+
def api_user_by_rank(rank):
|
| 1217 |
+
"""
|
| 1218 |
+
Get user at specific rank using RankTree.
|
| 1219 |
+
|
| 1220 |
+
Algorithm: Order Statistics Tree select(k)
|
| 1221 |
+
Time: O(log n)
|
| 1222 |
+
Use case: "Who is the 10th most active user?"
|
| 1223 |
+
"""
|
| 1224 |
+
timeframe = request.args.get('timeframe', 'all')
|
| 1225 |
+
tree = get_user_rank_tree(timeframe)
|
| 1226 |
+
|
| 1227 |
+
if rank < 1 or rank > len(tree):
|
| 1228 |
+
return jsonify({'error': f'Rank must be between 1 and {len(tree)}'}), 400
|
| 1229 |
+
|
| 1230 |
+
user = tree.select(rank)
|
| 1231 |
+
|
| 1232 |
+
if not user:
|
| 1233 |
+
return jsonify({'error': 'User not found'}), 404
|
| 1234 |
+
|
| 1235 |
+
return jsonify({
|
| 1236 |
+
'rank': rank,
|
| 1237 |
+
'user': user,
|
| 1238 |
+
'total_users': len(tree),
|
| 1239 |
+
'algorithm': 'RankTree select(k)',
|
| 1240 |
+
'complexity': 'O(log n)'
|
| 1241 |
+
})
|
| 1242 |
+
|
| 1243 |
+
|
| 1244 |
+
@app.route('/api/analytics/histogram')
|
| 1245 |
+
def api_activity_histogram():
|
| 1246 |
+
"""
|
| 1247 |
+
Get activity histogram using Bucket Sort.
|
| 1248 |
+
|
| 1249 |
+
Algorithm: Bucket Sort
|
| 1250 |
+
Time: O(n + k) where k = number of buckets
|
| 1251 |
+
Use case: Efficient time-based grouping without SQL GROUP BY
|
| 1252 |
+
"""
|
| 1253 |
+
timeframe = request.args.get('timeframe', 'month')
|
| 1254 |
+
bucket_seconds = int(request.args.get('bucket', 86400)) # Default: 1 day
|
| 1255 |
+
start_ts, end_ts = parse_timeframe(timeframe)
|
| 1256 |
+
|
| 1257 |
+
conn = get_db()
|
| 1258 |
+
|
| 1259 |
+
cursor = conn.execute('''
|
| 1260 |
+
SELECT date_unixtime FROM messages
|
| 1261 |
+
WHERE date_unixtime BETWEEN ? AND ?
|
| 1262 |
+
''', (start_ts, end_ts))
|
| 1263 |
+
|
| 1264 |
+
records = [{'date_unixtime': row[0]} for row in cursor.fetchall()]
|
| 1265 |
+
conn.close()
|
| 1266 |
+
|
| 1267 |
+
# Use bucket sort algorithm
|
| 1268 |
+
histogram = time_histogram(records, 'date_unixtime', bucket_size=bucket_seconds)
|
| 1269 |
+
|
| 1270 |
+
# Format for frontend
|
| 1271 |
+
from datetime import datetime
|
| 1272 |
+
result = []
|
| 1273 |
+
for bucket_time, count in histogram:
|
| 1274 |
+
result.append({
|
| 1275 |
+
'timestamp': bucket_time,
|
| 1276 |
+
'date': datetime.fromtimestamp(bucket_time).strftime('%Y-%m-%d %H:%M'),
|
| 1277 |
+
'count': count
|
| 1278 |
+
})
|
| 1279 |
+
|
| 1280 |
+
return jsonify({
|
| 1281 |
+
'histogram': result,
|
| 1282 |
+
'bucket_size_seconds': bucket_seconds,
|
| 1283 |
+
'total_records': len(records),
|
| 1284 |
+
'algorithm': 'Bucket Sort',
|
| 1285 |
+
'complexity': 'O(n + k)'
|
| 1286 |
+
})
|
| 1287 |
+
|
| 1288 |
+
|
| 1289 |
+
@app.route('/api/analytics/percentiles')
|
| 1290 |
+
def api_message_percentiles():
|
| 1291 |
+
"""
|
| 1292 |
+
Get message length percentiles using Selection Algorithm.
|
| 1293 |
+
|
| 1294 |
+
Algorithm: Quickselect with Median of Medians
|
| 1295 |
+
Time: O(n) guaranteed
|
| 1296 |
+
Use case: Analyze message length distribution without sorting
|
| 1297 |
+
"""
|
| 1298 |
+
timeframe = request.args.get('timeframe', 'all')
|
| 1299 |
+
start_ts, end_ts = parse_timeframe(timeframe)
|
| 1300 |
+
|
| 1301 |
+
conn = get_db()
|
| 1302 |
+
|
| 1303 |
+
cursor = conn.execute('''
|
| 1304 |
+
SELECT LENGTH(text_plain) as length FROM messages
|
| 1305 |
+
WHERE date_unixtime BETWEEN ? AND ?
|
| 1306 |
+
AND text_plain IS NOT NULL
|
| 1307 |
+
''', (start_ts, end_ts))
|
| 1308 |
+
|
| 1309 |
+
lengths = [row[0] for row in cursor.fetchall() if row[0]]
|
| 1310 |
+
conn.close()
|
| 1311 |
+
|
| 1312 |
+
if not lengths:
|
| 1313 |
+
return jsonify({'error': 'No messages found'}), 404
|
| 1314 |
+
|
| 1315 |
+
# Use our O(n) selection algorithm
|
| 1316 |
+
result = {
|
| 1317 |
+
'count': len(lengths),
|
| 1318 |
+
'min': min(lengths),
|
| 1319 |
+
'max': max(lengths),
|
| 1320 |
+
'median': find_median(lengths),
|
| 1321 |
+
'p25': find_percentile(lengths, 25),
|
| 1322 |
+
'p75': find_percentile(lengths, 75),
|
| 1323 |
+
'p90': find_percentile(lengths, 90),
|
| 1324 |
+
'p95': find_percentile(lengths, 95),
|
| 1325 |
+
'p99': find_percentile(lengths, 99),
|
| 1326 |
+
'algorithm': 'Quickselect with Median of Medians',
|
| 1327 |
+
'complexity': 'O(n) guaranteed'
|
| 1328 |
+
}
|
| 1329 |
+
|
| 1330 |
+
return jsonify(result)
|
| 1331 |
+
|
| 1332 |
+
|
| 1333 |
+
# ==========================================
|
| 1334 |
+
# API ENDPOINTS - SEARCH
|
| 1335 |
+
# ==========================================
|
| 1336 |
+
|
| 1337 |
+
@app.route('/api/search')
|
| 1338 |
+
def api_search():
|
| 1339 |
+
"""Search messages."""
|
| 1340 |
+
query = request.args.get('q', '')
|
| 1341 |
+
timeframe = request.args.get('timeframe', 'all')
|
| 1342 |
+
start_ts, end_ts = parse_timeframe(timeframe)
|
| 1343 |
+
limit = int(request.args.get('limit', 50))
|
| 1344 |
+
offset = int(request.args.get('offset', 0))
|
| 1345 |
+
|
| 1346 |
+
if not query:
|
| 1347 |
+
return jsonify({'results': [], 'total': 0})
|
| 1348 |
+
|
| 1349 |
+
conn = get_db()
|
| 1350 |
+
|
| 1351 |
+
cursor = conn.execute('''
|
| 1352 |
+
SELECT
|
| 1353 |
+
m.id,
|
| 1354 |
+
m.date,
|
| 1355 |
+
m.from_name,
|
| 1356 |
+
m.from_id,
|
| 1357 |
+
m.text_plain,
|
| 1358 |
+
m.has_links,
|
| 1359 |
+
m.has_media
|
| 1360 |
+
FROM messages_fts
|
| 1361 |
+
JOIN messages m ON messages_fts.rowid = m.id
|
| 1362 |
+
WHERE messages_fts MATCH ?
|
| 1363 |
+
AND m.date_unixtime BETWEEN ? AND ?
|
| 1364 |
+
ORDER BY m.date_unixtime DESC
|
| 1365 |
+
LIMIT ? OFFSET ?
|
| 1366 |
+
''', (query, start_ts, end_ts, limit, offset))
|
| 1367 |
+
|
| 1368 |
+
results = [{
|
| 1369 |
+
'id': row['id'],
|
| 1370 |
+
'date': row['date'],
|
| 1371 |
+
'from_name': row['from_name'],
|
| 1372 |
+
'from_id': row['from_id'],
|
| 1373 |
+
'text': row['text_plain'][:300] if row['text_plain'] else '',
|
| 1374 |
+
'has_links': bool(row['has_links']),
|
| 1375 |
+
'has_media': bool(row['has_media'])
|
| 1376 |
+
} for row in cursor.fetchall()]
|
| 1377 |
+
|
| 1378 |
+
conn.close()
|
| 1379 |
+
|
| 1380 |
+
return jsonify({
|
| 1381 |
+
'results': results,
|
| 1382 |
+
'query': query,
|
| 1383 |
+
'limit': limit,
|
| 1384 |
+
'offset': offset
|
| 1385 |
+
})
|
| 1386 |
+
|
| 1387 |
+
|
| 1388 |
+
# ==========================================
|
| 1389 |
+
# API ENDPOINTS - CHAT VIEW
|
| 1390 |
+
# ==========================================
|
| 1391 |
+
|
| 1392 |
+
@app.route('/api/chat/messages')
|
| 1393 |
+
def api_chat_messages():
|
| 1394 |
+
"""Get messages for chat view with filters."""
|
| 1395 |
+
offset = int(request.args.get('offset', 0))
|
| 1396 |
+
limit = int(request.args.get('limit', 50))
|
| 1397 |
+
user_id = request.args.get('user_id')
|
| 1398 |
+
search = request.args.get('search')
|
| 1399 |
+
date_from = request.args.get('date_from')
|
| 1400 |
+
date_to = request.args.get('date_to')
|
| 1401 |
+
has_media = request.args.get('has_media')
|
| 1402 |
+
has_link = request.args.get('has_link')
|
| 1403 |
+
|
| 1404 |
+
conn = get_db()
|
| 1405 |
+
|
| 1406 |
+
# Build query
|
| 1407 |
+
conditions = ["1=1"]
|
| 1408 |
+
params = []
|
| 1409 |
+
|
| 1410 |
+
if user_id:
|
| 1411 |
+
conditions.append("m.from_id = ?")
|
| 1412 |
+
params.append(user_id)
|
| 1413 |
+
|
| 1414 |
+
if date_from:
|
| 1415 |
+
conditions.append("m.date >= ?")
|
| 1416 |
+
params.append(date_from)
|
| 1417 |
+
|
| 1418 |
+
if date_to:
|
| 1419 |
+
conditions.append("m.date <= ?")
|
| 1420 |
+
params.append(date_to)
|
| 1421 |
+
|
| 1422 |
+
if has_media == '1':
|
| 1423 |
+
conditions.append("m.has_media = 1")
|
| 1424 |
+
elif has_media == '0':
|
| 1425 |
+
conditions.append("m.has_media = 0")
|
| 1426 |
+
|
| 1427 |
+
if has_link == '1':
|
| 1428 |
+
conditions.append("m.has_links = 1")
|
| 1429 |
+
|
| 1430 |
+
# Handle FTS search
|
| 1431 |
+
if search:
|
| 1432 |
+
conditions.append("""m.id IN (
|
| 1433 |
+
SELECT rowid FROM messages_fts WHERE messages_fts MATCH ?
|
| 1434 |
+
)""")
|
| 1435 |
+
params.append(search)
|
| 1436 |
+
|
| 1437 |
+
where_clause = " AND ".join(conditions)
|
| 1438 |
+
|
| 1439 |
+
# Get total count
|
| 1440 |
+
cursor = conn.execute(f"SELECT COUNT(*) FROM messages m WHERE {where_clause}", params)
|
| 1441 |
+
total = cursor.fetchone()[0]
|
| 1442 |
+
|
| 1443 |
+
# Get messages with reply info
|
| 1444 |
+
query = f"""
|
| 1445 |
+
SELECT
|
| 1446 |
+
m.id,
|
| 1447 |
+
m.id as message_id,
|
| 1448 |
+
m.date,
|
| 1449 |
+
m.from_id,
|
| 1450 |
+
m.from_name,
|
| 1451 |
+
m.text_plain as text,
|
| 1452 |
+
m.reply_to_message_id,
|
| 1453 |
+
m.forwarded_from,
|
| 1454 |
+
m.forwarded_from_id,
|
| 1455 |
+
m.has_media,
|
| 1456 |
+
m.has_photo,
|
| 1457 |
+
m.has_links as has_link,
|
| 1458 |
+
m.has_mentions,
|
| 1459 |
+
m.is_edited,
|
| 1460 |
+
r.from_name as reply_to_name,
|
| 1461 |
+
substr(r.text_plain, 1, 100) as reply_to_text
|
| 1462 |
+
FROM messages m
|
| 1463 |
+
LEFT JOIN messages r ON m.reply_to_message_id = r.id
|
| 1464 |
+
WHERE {where_clause}
|
| 1465 |
+
ORDER BY m.date ASC
|
| 1466 |
+
LIMIT ? OFFSET ?
|
| 1467 |
+
"""
|
| 1468 |
+
params.extend([limit, offset])
|
| 1469 |
+
|
| 1470 |
+
cursor = conn.execute(query, params)
|
| 1471 |
+
messages = [dict(row) for row in cursor.fetchall()]
|
| 1472 |
+
|
| 1473 |
+
# Fetch entities (links, mentions) for these messages
|
| 1474 |
+
if messages:
|
| 1475 |
+
msg_ids = [m['id'] for m in messages]
|
| 1476 |
+
placeholders = ','.join('?' * len(msg_ids))
|
| 1477 |
+
ent_cursor = conn.execute(f"""
|
| 1478 |
+
SELECT message_id, type, value
|
| 1479 |
+
FROM entities
|
| 1480 |
+
WHERE message_id IN ({placeholders})
|
| 1481 |
+
""", msg_ids)
|
| 1482 |
+
|
| 1483 |
+
# Group entities by message_id
|
| 1484 |
+
entities_map = {}
|
| 1485 |
+
for row in ent_cursor.fetchall():
|
| 1486 |
+
mid = row[0]
|
| 1487 |
+
if mid not in entities_map:
|
| 1488 |
+
entities_map[mid] = []
|
| 1489 |
+
entities_map[mid].append({'type': row[1], 'value': row[2]})
|
| 1490 |
+
|
| 1491 |
+
# Attach entities to messages
|
| 1492 |
+
for msg in messages:
|
| 1493 |
+
msg['entities'] = entities_map.get(msg['id'], [])
|
| 1494 |
+
|
| 1495 |
+
conn.close()
|
| 1496 |
+
|
| 1497 |
+
return jsonify({
|
| 1498 |
+
'messages': messages,
|
| 1499 |
+
'total': total,
|
| 1500 |
+
'offset': offset,
|
| 1501 |
+
'limit': limit,
|
| 1502 |
+
'has_more': offset + limit < total
|
| 1503 |
+
})
|
| 1504 |
+
|
| 1505 |
+
|
| 1506 |
+
@app.route('/api/chat/thread/<int:message_id>')
|
| 1507 |
+
def api_chat_thread(message_id):
|
| 1508 |
+
"""Get conversation thread for a message."""
|
| 1509 |
+
conn = get_db()
|
| 1510 |
+
thread = []
|
| 1511 |
+
visited = set()
|
| 1512 |
+
|
| 1513 |
+
def get_parent(msg_id):
|
| 1514 |
+
"""Recursively get parent messages."""
|
| 1515 |
+
if msg_id in visited:
|
| 1516 |
+
return
|
| 1517 |
+
visited.add(msg_id)
|
| 1518 |
+
|
| 1519 |
+
cursor = conn.execute("""
|
| 1520 |
+
SELECT id as message_id, date, from_name, text_plain as text, reply_to_message_id
|
| 1521 |
+
FROM messages WHERE id = ?
|
| 1522 |
+
""", (msg_id,))
|
| 1523 |
+
row = cursor.fetchone()
|
| 1524 |
+
|
| 1525 |
+
if row:
|
| 1526 |
+
if row['reply_to_message_id']:
|
| 1527 |
+
get_parent(row['reply_to_message_id'])
|
| 1528 |
+
thread.append(dict(row))
|
| 1529 |
+
|
| 1530 |
+
def get_children(msg_id):
|
| 1531 |
+
"""Get all replies to a message."""
|
| 1532 |
+
cursor = conn.execute("""
|
| 1533 |
+
SELECT id as message_id, date, from_name, text_plain as text, reply_to_message_id
|
| 1534 |
+
FROM messages WHERE reply_to_message_id = ?
|
| 1535 |
+
ORDER BY date
|
| 1536 |
+
""", (msg_id,))
|
| 1537 |
+
|
| 1538 |
+
for row in cursor.fetchall():
|
| 1539 |
+
if row['message_id'] not in visited:
|
| 1540 |
+
visited.add(row['message_id'])
|
| 1541 |
+
thread.append(dict(row))
|
| 1542 |
+
get_children(row['message_id'])
|
| 1543 |
+
|
| 1544 |
+
# Get the original message and its parents
|
| 1545 |
+
get_parent(message_id)
|
| 1546 |
+
|
| 1547 |
+
# Get all replies
|
| 1548 |
+
get_children(message_id)
|
| 1549 |
+
|
| 1550 |
+
conn.close()
|
| 1551 |
+
|
| 1552 |
+
# Sort by date
|
| 1553 |
+
thread.sort(key=lambda x: x['date'])
|
| 1554 |
+
|
| 1555 |
+
return jsonify(thread)
|
| 1556 |
+
|
| 1557 |
+
|
| 1558 |
+
@app.route('/api/chat/context/<int:message_id>')
|
| 1559 |
+
def api_chat_context(message_id):
|
| 1560 |
+
"""Get messages around a specific message."""
|
| 1561 |
+
before = int(request.args.get('before', 20))
|
| 1562 |
+
after = int(request.args.get('after', 20))
|
| 1563 |
+
|
| 1564 |
+
conn = get_db()
|
| 1565 |
+
|
| 1566 |
+
# Get target message date
|
| 1567 |
+
cursor = conn.execute("SELECT date FROM messages WHERE id = ?", (message_id,))
|
| 1568 |
+
row = cursor.fetchone()
|
| 1569 |
+
|
| 1570 |
+
if not row:
|
| 1571 |
+
conn.close()
|
| 1572 |
+
return jsonify({'messages': [], 'target_id': message_id})
|
| 1573 |
+
|
| 1574 |
+
target_date = row['date']
|
| 1575 |
+
|
| 1576 |
+
# Get messages before
|
| 1577 |
+
cursor = conn.execute("""
|
| 1578 |
+
SELECT id as message_id, date, from_id, from_name, text_plain as text,
|
| 1579 |
+
reply_to_message_id, has_media, has_links as has_link
|
| 1580 |
+
FROM messages
|
| 1581 |
+
WHERE date < ?
|
| 1582 |
+
ORDER BY date DESC
|
| 1583 |
+
LIMIT ?
|
| 1584 |
+
""", (target_date, before))
|
| 1585 |
+
before_msgs = list(reversed([dict(row) for row in cursor.fetchall()]))
|
| 1586 |
+
|
| 1587 |
+
# Get target message
|
| 1588 |
+
cursor = conn.execute("""
|
| 1589 |
+
SELECT id as message_id, date, from_id, from_name, text_plain as text,
|
| 1590 |
+
reply_to_message_id, has_media, has_links as has_link
|
| 1591 |
+
FROM messages
|
| 1592 |
+
WHERE id = ?
|
| 1593 |
+
""", (message_id,))
|
| 1594 |
+
target_msg = dict(cursor.fetchone())
|
| 1595 |
+
|
| 1596 |
+
# Get messages after
|
| 1597 |
+
cursor = conn.execute("""
|
| 1598 |
+
SELECT id as message_id, date, from_id, from_name, text_plain as text,
|
| 1599 |
+
reply_to_message_id, has_media, has_links as has_link
|
| 1600 |
+
FROM messages
|
| 1601 |
+
WHERE date > ?
|
| 1602 |
+
ORDER BY date ASC
|
| 1603 |
+
LIMIT ?
|
| 1604 |
+
""", (target_date, after))
|
| 1605 |
+
after_msgs = [dict(row) for row in cursor.fetchall()]
|
| 1606 |
+
|
| 1607 |
+
conn.close()
|
| 1608 |
+
|
| 1609 |
+
return jsonify({
|
| 1610 |
+
'messages': before_msgs + [target_msg] + after_msgs,
|
| 1611 |
+
'target_id': message_id
|
| 1612 |
+
})
|
| 1613 |
+
|
| 1614 |
+
|
| 1615 |
+
# ==========================================
|
| 1616 |
+
# API ENDPOINTS - AI SEARCH
|
| 1617 |
+
# ==========================================
|
| 1618 |
+
|
| 1619 |
+
# Global AI engine (lazy loaded)
|
| 1620 |
+
_ai_engine = None
|
| 1621 |
+
_ai_engine_init_attempted = False
|
| 1622 |
+
|
| 1623 |
+
def get_ai_engine():
|
| 1624 |
+
"""Get or create AI search engine."""
|
| 1625 |
+
global _ai_engine, _ai_engine_init_attempted
|
| 1626 |
+
|
| 1627 |
+
if _ai_engine is not None:
|
| 1628 |
+
return _ai_engine
|
| 1629 |
+
|
| 1630 |
+
if _ai_engine_init_attempted:
|
| 1631 |
+
return None # Already tried and failed
|
| 1632 |
+
|
| 1633 |
+
_ai_engine_init_attempted = True
|
| 1634 |
+
|
| 1635 |
+
try:
|
| 1636 |
+
from ai_search import AISearchEngine
|
| 1637 |
+
import os
|
| 1638 |
+
|
| 1639 |
+
provider = os.getenv('AI_PROVIDER', 'ollama')
|
| 1640 |
+
# Get API key - check both generic and provider-specific env vars
|
| 1641 |
+
api_key = os.getenv('AI_API_KEY') or os.getenv(f'{provider.upper()}_API_KEY')
|
| 1642 |
+
|
| 1643 |
+
print(f"Initializing AI engine with provider: {provider}")
|
| 1644 |
+
_ai_engine = AISearchEngine(DB_PATH, provider, api_key)
|
| 1645 |
+
print(f"AI engine initialized successfully")
|
| 1646 |
+
return _ai_engine
|
| 1647 |
+
except Exception as e:
|
| 1648 |
+
print(f"AI Search not available: {e}")
|
| 1649 |
+
import traceback
|
| 1650 |
+
traceback.print_exc()
|
| 1651 |
+
return None
|
| 1652 |
+
|
| 1653 |
+
|
| 1654 |
+
@app.route('/api/ai/status')
|
| 1655 |
+
def api_ai_status():
|
| 1656 |
+
"""Get AI engine status for debugging."""
|
| 1657 |
+
provider = os.getenv('AI_PROVIDER', 'ollama')
|
| 1658 |
+
api_key = os.getenv('AI_API_KEY') or os.getenv(f'{provider.upper()}_API_KEY')
|
| 1659 |
+
|
| 1660 |
+
status = {
|
| 1661 |
+
'provider': provider,
|
| 1662 |
+
'api_key_set': bool(api_key),
|
| 1663 |
+
'api_key_preview': f"{api_key[:8]}..." if api_key and len(api_key) > 8 else None,
|
| 1664 |
+
'ai_engine_initialized': _ai_engine is not None,
|
| 1665 |
+
'init_attempted': _ai_engine_init_attempted,
|
| 1666 |
+
'semantic_search_available': HAS_SEMANTIC_SEARCH,
|
| 1667 |
+
}
|
| 1668 |
+
|
| 1669 |
+
# Check if we can initialize now
|
| 1670 |
+
if _ai_engine is None and not _ai_engine_init_attempted:
|
| 1671 |
+
engine = get_ai_engine()
|
| 1672 |
+
status['ai_engine_initialized'] = engine is not None
|
| 1673 |
+
|
| 1674 |
+
# Check for embeddings
|
| 1675 |
+
if HAS_SEMANTIC_SEARCH:
|
| 1676 |
+
try:
|
| 1677 |
+
ss = get_semantic_search()
|
| 1678 |
+
status['embeddings_available'] = ss.is_available()
|
| 1679 |
+
status['embeddings_stats'] = ss.stats()
|
| 1680 |
+
except Exception as e:
|
| 1681 |
+
status['embeddings_error'] = str(e)
|
| 1682 |
+
|
| 1683 |
+
return jsonify(status)
|
| 1684 |
+
|
| 1685 |
+
|
| 1686 |
+
@app.route('/api/ai/reset')
|
| 1687 |
+
def api_ai_reset():
|
| 1688 |
+
"""Reset AI engine to allow re-initialization."""
|
| 1689 |
+
global _ai_engine, _ai_engine_init_attempted
|
| 1690 |
+
_ai_engine = None
|
| 1691 |
+
_ai_engine_init_attempted = False
|
| 1692 |
+
return jsonify({'status': 'reset', 'message': 'AI engine will be reinitialized on next request'})
|
| 1693 |
+
|
| 1694 |
+
|
| 1695 |
+
@app.route('/api/cache/invalidate')
|
| 1696 |
+
def api_cache_invalidate():
|
| 1697 |
+
"""Invalidate all caches. Call after DB updates (daily sync, import, etc.)."""
|
| 1698 |
+
invalidate_caches()
|
| 1699 |
+
return jsonify({'status': 'invalidated', 'new_version': _cache_version})
|
| 1700 |
+
|
| 1701 |
+
|
| 1702 |
+
@app.route('/api/embeddings/reload')
|
| 1703 |
+
def api_embeddings_reload():
|
| 1704 |
+
"""Reload embeddings from DB (call after daily sync adds new embeddings)."""
|
| 1705 |
+
if not HAS_SEMANTIC_SEARCH:
|
| 1706 |
+
return jsonify({'error': 'Semantic search not available'})
|
| 1707 |
+
try:
|
| 1708 |
+
ss = get_semantic_search()
|
| 1709 |
+
old_count = len(ss.message_ids) if ss.embeddings_loaded else 0
|
| 1710 |
+
ss.reload_embeddings()
|
| 1711 |
+
new_count = len(ss.message_ids)
|
| 1712 |
+
return jsonify({
|
| 1713 |
+
'status': 'reloaded',
|
| 1714 |
+
'previous_count': old_count,
|
| 1715 |
+
'new_count': new_count,
|
| 1716 |
+
'added': new_count - old_count
|
| 1717 |
+
})
|
| 1718 |
+
except Exception as e:
|
| 1719 |
+
return jsonify({'error': str(e)})
|
| 1720 |
+
|
| 1721 |
+
|
| 1722 |
+
@app.route('/api/ai/search', methods=['POST'])
|
| 1723 |
+
def api_ai_search():
|
| 1724 |
+
"""AI-powered natural language search."""
|
| 1725 |
+
data = request.get_json()
|
| 1726 |
+
query = data.get('query', '')
|
| 1727 |
+
mode = data.get('mode', 'auto') # 'auto', 'sql', 'context', or 'semantic'
|
| 1728 |
+
|
| 1729 |
+
if not query:
|
| 1730 |
+
return jsonify({'error': 'Query required'})
|
| 1731 |
+
|
| 1732 |
+
# Semantic mode: Use pre-computed embeddings + AI reasoning
|
| 1733 |
+
if mode == 'semantic':
|
| 1734 |
+
if not HAS_SEMANTIC_SEARCH:
|
| 1735 |
+
return jsonify({'error': 'Semantic search not available. Install sentence-transformers.'})
|
| 1736 |
+
try:
|
| 1737 |
+
ss = get_semantic_search()
|
| 1738 |
+
if not ss.is_available():
|
| 1739 |
+
return jsonify({'error': 'embeddings.db not found. Run the Colab notebook first.'})
|
| 1740 |
+
|
| 1741 |
+
# Get AI engine for reasoning
|
| 1742 |
+
ai_engine = get_ai_engine()
|
| 1743 |
+
|
| 1744 |
+
if ai_engine:
|
| 1745 |
+
# Semantic search + AI reasoning
|
| 1746 |
+
result = ss.search_with_ai_answer(query, ai_engine, limit=30)
|
| 1747 |
+
return jsonify(result)
|
| 1748 |
+
else:
|
| 1749 |
+
# Just semantic search without AI reasoning
|
| 1750 |
+
results = ss.search_with_full_text(query, limit=30)
|
| 1751 |
+
provider = os.getenv('AI_PROVIDER', 'ollama')
|
| 1752 |
+
api_key_set = bool(os.getenv('AI_API_KEY') or os.getenv(f'{provider.upper()}_API_KEY'))
|
| 1753 |
+
return jsonify({
|
| 1754 |
+
'query': query,
|
| 1755 |
+
'mode': 'semantic',
|
| 1756 |
+
'results': results,
|
| 1757 |
+
'count': len(results),
|
| 1758 |
+
'answer': f"נמצאו {len(results)} הודעות דומות סמנטית לשאילתה.\n\n⚠️ AI לא זמין - בדוק שה-API key מוגדר (provider: {provider}, key set: {api_key_set})"
|
| 1759 |
+
})
|
| 1760 |
+
except Exception as e:
|
| 1761 |
+
return jsonify({'error': f'Semantic search error: {str(e)}'})
|
| 1762 |
+
|
| 1763 |
+
engine = get_ai_engine()
|
| 1764 |
+
|
| 1765 |
+
if engine is None:
|
| 1766 |
+
# Fallback: Use basic SQL search
|
| 1767 |
+
return fallback_ai_search(query)
|
| 1768 |
+
|
| 1769 |
+
try:
|
| 1770 |
+
# Context mode: AI reads messages and reasons over them
|
| 1771 |
+
if mode == 'context':
|
| 1772 |
+
result = engine.context_search(query)
|
| 1773 |
+
# SQL mode: Generate SQL and execute
|
| 1774 |
+
elif mode == 'sql':
|
| 1775 |
+
result = engine.search(query, generate_answer=True)
|
| 1776 |
+
# Auto mode: Try SQL first, fall back to context if no results
|
| 1777 |
+
else:
|
| 1778 |
+
result = engine.search(query, generate_answer=True)
|
| 1779 |
+
# If no results or error, try context search
|
| 1780 |
+
if result.get('count', 0) == 0 or 'error' in result:
|
| 1781 |
+
result = engine.context_search(query)
|
| 1782 |
+
|
| 1783 |
+
return jsonify(result)
|
| 1784 |
+
except Exception as e:
|
| 1785 |
+
return jsonify({'error': str(e), 'query': query})
|
| 1786 |
+
|
| 1787 |
+
|
| 1788 |
+
def fallback_ai_search(query: str):
|
| 1789 |
+
"""Fallback search when AI is not available."""
|
| 1790 |
+
conn = get_db()
|
| 1791 |
+
|
| 1792 |
+
# Simple keyword extraction and search
|
| 1793 |
+
keywords = [w for w in query.split() if len(w) > 2]
|
| 1794 |
+
|
| 1795 |
+
if not keywords:
|
| 1796 |
+
return jsonify({'error': 'No valid keywords', 'query': query})
|
| 1797 |
+
|
| 1798 |
+
# Build FTS query
|
| 1799 |
+
fts_query = ' OR '.join(keywords)
|
| 1800 |
+
|
| 1801 |
+
try:
|
| 1802 |
+
cursor = conn.execute('''
|
| 1803 |
+
SELECT
|
| 1804 |
+
m.id as message_id, m.date, m.from_name, m.text_plain as text
|
| 1805 |
+
FROM messages_fts
|
| 1806 |
+
JOIN messages m ON messages_fts.rowid = m.id
|
| 1807 |
+
WHERE messages_fts MATCH ?
|
| 1808 |
+
ORDER BY m.date DESC
|
| 1809 |
+
LIMIT 20
|
| 1810 |
+
''', (fts_query,))
|
| 1811 |
+
|
| 1812 |
+
results = [dict(row) for row in cursor.fetchall()]
|
| 1813 |
+
conn.close()
|
| 1814 |
+
|
| 1815 |
+
# Generate simple answer
|
| 1816 |
+
if results:
|
| 1817 |
+
answer = f"נמצאו {len(results)} הודעות עם המילים: {', '.join(keywords)}"
|
| 1818 |
+
else:
|
| 1819 |
+
answer = f"לא נמצאו הודעות עם המילים: {', '.join(keywords)}"
|
| 1820 |
+
|
| 1821 |
+
return jsonify({
|
| 1822 |
+
'query': query,
|
| 1823 |
+
'sql': f"FTS MATCH: {fts_query}",
|
| 1824 |
+
'results': results,
|
| 1825 |
+
'count': len(results),
|
| 1826 |
+
'answer': answer,
|
| 1827 |
+
'fallback': True
|
| 1828 |
+
})
|
| 1829 |
+
|
| 1830 |
+
except Exception as e:
|
| 1831 |
+
conn.close()
|
| 1832 |
+
return jsonify({'error': str(e), 'query': query})
|
| 1833 |
+
|
| 1834 |
+
|
| 1835 |
+
@app.route('/api/ai/thread/<int:message_id>')
|
| 1836 |
+
def api_ai_thread(message_id):
|
| 1837 |
+
"""Get full thread using AI-powered analysis."""
|
| 1838 |
+
engine = get_ai_engine()
|
| 1839 |
+
|
| 1840 |
+
if engine is None:
|
| 1841 |
+
# Use basic thread retrieval
|
| 1842 |
+
return api_chat_thread(message_id)
|
| 1843 |
+
|
| 1844 |
+
try:
|
| 1845 |
+
thread = engine.get_thread(message_id)
|
| 1846 |
+
return jsonify(thread)
|
| 1847 |
+
except Exception as e:
|
| 1848 |
+
return jsonify({'error': str(e)})
|
| 1849 |
+
|
| 1850 |
+
|
| 1851 |
+
@app.route('/api/ai/similar/<int:message_id>')
|
| 1852 |
+
def api_ai_similar(message_id):
|
| 1853 |
+
"""Find similar messages."""
|
| 1854 |
+
limit = int(request.args.get('limit', 10))
|
| 1855 |
+
|
| 1856 |
+
engine = get_ai_engine()
|
| 1857 |
+
|
| 1858 |
+
if engine is None:
|
| 1859 |
+
return jsonify({'error': 'AI not available'})
|
| 1860 |
+
|
| 1861 |
+
try:
|
| 1862 |
+
similar = engine.find_similar_messages(message_id, limit)
|
| 1863 |
+
return jsonify(similar)
|
| 1864 |
+
except Exception as e:
|
| 1865 |
+
return jsonify({'error': str(e)})
|
| 1866 |
+
|
| 1867 |
+
|
| 1868 |
+
# ==========================================
|
| 1869 |
+
# API ENDPOINTS - DATABASE UPDATE
|
| 1870 |
+
# ==========================================
|
| 1871 |
+
|
| 1872 |
+
@app.route('/api/update', methods=['POST'])
|
| 1873 |
+
def api_update_database():
|
| 1874 |
+
"""
|
| 1875 |
+
Update database with new JSON data.
|
| 1876 |
+
Disabled in production - updates are done locally via daily_sync.py.
|
| 1877 |
+
"""
|
| 1878 |
+
return jsonify({'error': 'Database updates are disabled on this server. Run daily_sync.py locally.'}), 403
|
| 1879 |
+
try:
|
| 1880 |
+
# Check if file was uploaded
|
| 1881 |
+
if 'file' in request.files:
|
| 1882 |
+
file = request.files['file']
|
| 1883 |
+
if file.filename == '':
|
| 1884 |
+
return jsonify({'error': 'No file selected'}), 400
|
| 1885 |
+
|
| 1886 |
+
# Read and parse JSON
|
| 1887 |
+
try:
|
| 1888 |
+
json_data = json.loads(file.read().decode('utf-8'))
|
| 1889 |
+
except json.JSONDecodeError as e:
|
| 1890 |
+
return jsonify({'error': f'Invalid JSON: {str(e)}'}), 400
|
| 1891 |
+
else:
|
| 1892 |
+
# Try to get JSON from request body
|
| 1893 |
+
json_data = request.get_json()
|
| 1894 |
+
if not json_data:
|
| 1895 |
+
return jsonify({'error': 'No JSON data provided'}), 400
|
| 1896 |
+
|
| 1897 |
+
# Import and use IncrementalIndexer
|
| 1898 |
+
from indexer import IncrementalIndexer
|
| 1899 |
+
|
| 1900 |
+
indexer = IncrementalIndexer(DB_PATH)
|
| 1901 |
+
try:
|
| 1902 |
+
stats = indexer.update_from_json_data(json_data, show_progress=False)
|
| 1903 |
+
finally:
|
| 1904 |
+
indexer.close()
|
| 1905 |
+
|
| 1906 |
+
return jsonify({
|
| 1907 |
+
'success': True,
|
| 1908 |
+
'stats': {
|
| 1909 |
+
'total_in_file': stats['total_in_file'],
|
| 1910 |
+
'new_messages': stats['new_messages'],
|
| 1911 |
+
'duplicates': stats['duplicates'],
|
| 1912 |
+
'entities': stats['entities'],
|
| 1913 |
+
'elapsed_seconds': round(stats['elapsed_seconds'], 2)
|
| 1914 |
+
}
|
| 1915 |
+
})
|
| 1916 |
+
|
| 1917 |
+
except FileNotFoundError as e:
|
| 1918 |
+
return jsonify({'error': str(e)}), 404
|
| 1919 |
+
except Exception as e:
|
| 1920 |
+
return jsonify({'error': str(e)}), 500
|
| 1921 |
+
|
| 1922 |
+
|
| 1923 |
+
@app.route('/api/db/stats')
|
| 1924 |
+
def api_db_stats():
|
| 1925 |
+
"""Get database statistics."""
|
| 1926 |
+
conn = get_db()
|
| 1927 |
+
|
| 1928 |
+
stats = {}
|
| 1929 |
+
|
| 1930 |
+
# Total messages
|
| 1931 |
+
cursor = conn.execute('SELECT COUNT(*) FROM messages')
|
| 1932 |
+
stats['total_messages'] = cursor.fetchone()[0]
|
| 1933 |
+
|
| 1934 |
+
# Total users
|
| 1935 |
+
cursor = conn.execute('SELECT COUNT(DISTINCT from_id) FROM messages WHERE from_id IS NOT NULL')
|
| 1936 |
+
stats['total_users'] = cursor.fetchone()[0]
|
| 1937 |
+
|
| 1938 |
+
# Date range
|
| 1939 |
+
cursor = conn.execute('SELECT MIN(date), MAX(date) FROM messages')
|
| 1940 |
+
row = cursor.fetchone()
|
| 1941 |
+
stats['first_message'] = row[0]
|
| 1942 |
+
stats['last_message'] = row[1]
|
| 1943 |
+
|
| 1944 |
+
# Database file size
|
| 1945 |
+
import os
|
| 1946 |
+
if os.path.exists(DB_PATH):
|
| 1947 |
+
stats['db_size_mb'] = round(os.path.getsize(DB_PATH) / (1024 * 1024), 2)
|
| 1948 |
+
|
| 1949 |
+
conn.close()
|
| 1950 |
+
|
| 1951 |
+
return jsonify(stats)
|
| 1952 |
+
|
| 1953 |
+
|
| 1954 |
+
# ==========================================
|
| 1955 |
+
# API ENDPOINTS - EXPORT
|
| 1956 |
+
# ==========================================
|
| 1957 |
+
|
| 1958 |
+
@app.route('/api/export/users')
|
| 1959 |
+
def api_export_users():
|
| 1960 |
+
"""Export user data as CSV."""
|
| 1961 |
+
timeframe = request.args.get('timeframe', 'all')
|
| 1962 |
+
start_ts, end_ts = parse_timeframe(timeframe)
|
| 1963 |
+
|
| 1964 |
+
conn = get_db()
|
| 1965 |
+
|
| 1966 |
+
cursor = conn.execute('''
|
| 1967 |
+
SELECT
|
| 1968 |
+
from_id,
|
| 1969 |
+
from_name,
|
| 1970 |
+
COUNT(*) as message_count,
|
| 1971 |
+
SUM(LENGTH(text_plain)) as char_count,
|
| 1972 |
+
SUM(has_links) as links,
|
| 1973 |
+
SUM(has_media) as media,
|
| 1974 |
+
MIN(date_unixtime) as first_seen,
|
| 1975 |
+
MAX(date_unixtime) as last_seen
|
| 1976 |
+
FROM messages
|
| 1977 |
+
WHERE date_unixtime BETWEEN ? AND ?
|
| 1978 |
+
AND from_id IS NOT NULL
|
| 1979 |
+
GROUP BY from_id
|
| 1980 |
+
ORDER BY message_count DESC
|
| 1981 |
+
''', (start_ts, end_ts))
|
| 1982 |
+
|
| 1983 |
+
output = io.StringIO()
|
| 1984 |
+
writer = csv.writer(output)
|
| 1985 |
+
writer.writerow(['User ID', 'Name', 'Messages', 'Characters', 'Links', 'Media', 'First Seen', 'Last Seen'])
|
| 1986 |
+
|
| 1987 |
+
for row in cursor.fetchall():
|
| 1988 |
+
writer.writerow([
|
| 1989 |
+
row['from_id'],
|
| 1990 |
+
row['from_name'],
|
| 1991 |
+
row['message_count'],
|
| 1992 |
+
row['char_count'] or 0,
|
| 1993 |
+
row['links'] or 0,
|
| 1994 |
+
row['media'] or 0,
|
| 1995 |
+
datetime.fromtimestamp(row['first_seen']).isoformat() if row['first_seen'] else '',
|
| 1996 |
+
datetime.fromtimestamp(row['last_seen']).isoformat() if row['last_seen'] else ''
|
| 1997 |
+
])
|
| 1998 |
+
|
| 1999 |
+
conn.close()
|
| 2000 |
+
|
| 2001 |
+
output.seek(0)
|
| 2002 |
+
return Response(
|
| 2003 |
+
output.getvalue(),
|
| 2004 |
+
mimetype='text/csv',
|
| 2005 |
+
headers={'Content-Disposition': 'attachment; filename=users_export.csv'}
|
| 2006 |
+
)
|
| 2007 |
+
|
| 2008 |
+
|
| 2009 |
+
@app.route('/api/export/messages')
|
| 2010 |
+
def api_export_messages():
|
| 2011 |
+
"""Export messages as CSV."""
|
| 2012 |
+
timeframe = request.args.get('timeframe', 'all')
|
| 2013 |
+
start_ts, end_ts = parse_timeframe(timeframe)
|
| 2014 |
+
limit = int(request.args.get('limit', 10000))
|
| 2015 |
+
|
| 2016 |
+
conn = get_db()
|
| 2017 |
+
|
| 2018 |
+
cursor = conn.execute('''
|
| 2019 |
+
SELECT
|
| 2020 |
+
id, date, from_id, from_name, text_plain,
|
| 2021 |
+
has_links, has_media, has_mentions,
|
| 2022 |
+
reply_to_message_id
|
| 2023 |
+
FROM messages
|
| 2024 |
+
WHERE date_unixtime BETWEEN ? AND ?
|
| 2025 |
+
ORDER BY date_unixtime DESC
|
| 2026 |
+
LIMIT ?
|
| 2027 |
+
''', (start_ts, end_ts, limit))
|
| 2028 |
+
|
| 2029 |
+
output = io.StringIO()
|
| 2030 |
+
writer = csv.writer(output)
|
| 2031 |
+
writer.writerow(['ID', 'Date', 'User ID', 'User Name', 'Text', 'Has Links', 'Has Media', 'Has Mentions', 'Reply To'])
|
| 2032 |
+
|
| 2033 |
+
for row in cursor.fetchall():
|
| 2034 |
+
writer.writerow([
|
| 2035 |
+
row['id'],
|
| 2036 |
+
row['date'],
|
| 2037 |
+
row['from_id'],
|
| 2038 |
+
row['from_name'],
|
| 2039 |
+
row['text_plain'][:500] if row['text_plain'] else '',
|
| 2040 |
+
row['has_links'],
|
| 2041 |
+
row['has_media'],
|
| 2042 |
+
row['has_mentions'],
|
| 2043 |
+
row['reply_to_message_id']
|
| 2044 |
+
])
|
| 2045 |
+
|
| 2046 |
+
conn.close()
|
| 2047 |
+
|
| 2048 |
+
output.seek(0)
|
| 2049 |
+
return Response(
|
| 2050 |
+
output.getvalue(),
|
| 2051 |
+
mimetype='text/csv',
|
| 2052 |
+
headers={'Content-Disposition': 'attachment; filename=messages_export.csv'}
|
| 2053 |
+
)
|
| 2054 |
+
|
| 2055 |
+
|
| 2056 |
+
# ==========================================
|
| 2057 |
+
# MAIN
|
| 2058 |
+
# ==========================================
|
| 2059 |
+
|
| 2060 |
+
def main():
|
| 2061 |
+
import argparse
|
| 2062 |
+
parser = argparse.ArgumentParser(description='Telegram Analytics Dashboard')
|
| 2063 |
+
parser.add_argument('--db', default=os.environ.get('DB_PATH', 'telegram.db'), help='Database path')
|
| 2064 |
+
parser.add_argument('--port', type=int, default=int(os.environ.get('PORT', 5000)), help='Server port')
|
| 2065 |
+
parser.add_argument('--host', default=os.environ.get('HOST', '127.0.0.1'), help='Server host')
|
| 2066 |
+
parser.add_argument('--debug', action='store_true', help='Debug mode')
|
| 2067 |
+
|
| 2068 |
+
args = parser.parse_args()
|
| 2069 |
+
|
| 2070 |
+
global DB_PATH
|
| 2071 |
+
DB_PATH = args.db
|
| 2072 |
+
|
| 2073 |
+
print(f"""
|
| 2074 |
+
╔══════════════════════════════════════════════════════════════╗
|
| 2075 |
+
║ TELEGRAM ANALYTICS DASHBOARD ║
|
| 2076 |
+
╠══════════════════════════════════════════════════════════════╣
|
| 2077 |
+
║ Database: {args.db:47} ║
|
| 2078 |
+
║ Server: http://{args.host}:{args.port:<37} ║
|
| 2079 |
+
╚══════════════════════════════════════════════════════════════╝
|
| 2080 |
+
""")
|
| 2081 |
+
|
| 2082 |
+
app.run(host=args.host, port=args.port, debug=args.debug)
|
| 2083 |
+
|
| 2084 |
+
|
| 2085 |
+
if __name__ == '__main__':
|
| 2086 |
+
main()
|
data_structures.py
ADDED
|
@@ -0,0 +1,773 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Advanced Data Structures for Efficient Search and Traversal
|
| 4 |
+
|
| 5 |
+
Includes:
|
| 6 |
+
- Bloom Filter: O(1) "definitely not in set" checks
|
| 7 |
+
- Trie: O(k) prefix search and autocomplete
|
| 8 |
+
- LRU Cache: O(1) cached query results
|
| 9 |
+
- Graph algorithms: DFS, BFS for thread traversal
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import hashlib
|
| 13 |
+
import math
|
| 14 |
+
from collections import OrderedDict, defaultdict, deque
|
| 15 |
+
from typing import Any, Callable, Generator, Iterator, Optional
|
| 16 |
+
from functools import wraps
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
# ============================================
|
| 20 |
+
# BLOOM FILTER
|
| 21 |
+
# ============================================
|
| 22 |
+
|
| 23 |
+
class BloomFilter:
|
| 24 |
+
"""
|
| 25 |
+
Space-efficient probabilistic data structure for set membership testing.
|
| 26 |
+
|
| 27 |
+
- O(k) insert and lookup where k is number of hash functions
|
| 28 |
+
- False positives possible, false negatives impossible
|
| 29 |
+
- Use case: Quick "message ID exists?" check before DB query
|
| 30 |
+
|
| 31 |
+
Example:
|
| 32 |
+
bf = BloomFilter(expected_items=100000, fp_rate=0.01)
|
| 33 |
+
bf.add("message_123")
|
| 34 |
+
if "message_123" in bf: # O(1) check
|
| 35 |
+
# Might exist, check DB
|
| 36 |
+
else:
|
| 37 |
+
# Definitely doesn't exist, skip DB
|
| 38 |
+
"""
|
| 39 |
+
|
| 40 |
+
def __init__(self, expected_items: int = 100000, fp_rate: float = 0.01):
|
| 41 |
+
"""
|
| 42 |
+
Initialize Bloom filter.
|
| 43 |
+
|
| 44 |
+
Args:
|
| 45 |
+
expected_items: Expected number of items to store
|
| 46 |
+
fp_rate: Desired false positive rate (0.01 = 1%)
|
| 47 |
+
"""
|
| 48 |
+
# Calculate optimal size and hash count
|
| 49 |
+
self.size = self._optimal_size(expected_items, fp_rate)
|
| 50 |
+
self.hash_count = self._optimal_hash_count(self.size, expected_items)
|
| 51 |
+
self.bit_array = bytearray(math.ceil(self.size / 8))
|
| 52 |
+
self.count = 0
|
| 53 |
+
|
| 54 |
+
@staticmethod
|
| 55 |
+
def _optimal_size(n: int, p: float) -> int:
|
| 56 |
+
"""Calculate optimal bit array size: m = -n*ln(p) / (ln2)^2"""
|
| 57 |
+
return int(-n * math.log(p) / (math.log(2) ** 2))
|
| 58 |
+
|
| 59 |
+
@staticmethod
|
| 60 |
+
def _optimal_hash_count(m: int, n: int) -> int:
|
| 61 |
+
"""Calculate optimal hash count: k = (m/n) * ln2"""
|
| 62 |
+
return max(1, int((m / n) * math.log(2)))
|
| 63 |
+
|
| 64 |
+
def _get_hash_values(self, item: str) -> Generator[int, None, None]:
|
| 65 |
+
"""Generate k hash values using double hashing technique."""
|
| 66 |
+
h1 = int(hashlib.md5(item.encode()).hexdigest(), 16)
|
| 67 |
+
h2 = int(hashlib.sha1(item.encode()).hexdigest(), 16)
|
| 68 |
+
for i in range(self.hash_count):
|
| 69 |
+
yield (h1 + i * h2) % self.size
|
| 70 |
+
|
| 71 |
+
def add(self, item: str) -> None:
|
| 72 |
+
"""Add an item to the filter. O(k) where k is hash count."""
|
| 73 |
+
for pos in self._get_hash_values(item):
|
| 74 |
+
byte_idx, bit_idx = divmod(pos, 8)
|
| 75 |
+
self.bit_array[byte_idx] |= (1 << bit_idx)
|
| 76 |
+
self.count += 1
|
| 77 |
+
|
| 78 |
+
def __contains__(self, item: str) -> bool:
|
| 79 |
+
"""Check if item might be in the filter. O(k)."""
|
| 80 |
+
for pos in self._get_hash_values(item):
|
| 81 |
+
byte_idx, bit_idx = divmod(pos, 8)
|
| 82 |
+
if not (self.bit_array[byte_idx] & (1 << bit_idx)):
|
| 83 |
+
return False # Definitely not in set
|
| 84 |
+
return True # Might be in set
|
| 85 |
+
|
| 86 |
+
def __len__(self) -> int:
|
| 87 |
+
return self.count
|
| 88 |
+
|
| 89 |
+
@property
|
| 90 |
+
def memory_usage(self) -> int:
|
| 91 |
+
"""Return memory usage in bytes."""
|
| 92 |
+
return len(self.bit_array)
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
# ============================================
|
| 96 |
+
# TRIE (PREFIX TREE)
|
| 97 |
+
# ============================================
|
| 98 |
+
|
| 99 |
+
class TrieNode:
|
| 100 |
+
"""Node in a Trie data structure."""
|
| 101 |
+
__slots__ = ['children', 'is_end', 'data', 'count']
|
| 102 |
+
|
| 103 |
+
def __init__(self):
|
| 104 |
+
self.children: dict[str, TrieNode] = {}
|
| 105 |
+
self.is_end: bool = False
|
| 106 |
+
self.data: Any = None # Store associated data (e.g., message IDs)
|
| 107 |
+
self.count: int = 0 # Frequency count
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
class Trie:
|
| 111 |
+
"""
|
| 112 |
+
Trie (Prefix Tree) for fast prefix-based search and autocomplete.
|
| 113 |
+
|
| 114 |
+
- O(k) insert/search where k is key length
|
| 115 |
+
- O(p + n) prefix search where p is prefix length, n is results
|
| 116 |
+
- Use case: Autocomplete usernames, find all messages starting with prefix
|
| 117 |
+
|
| 118 |
+
Example:
|
| 119 |
+
trie = Trie()
|
| 120 |
+
trie.insert("@username1", message_ids=[1, 2, 3])
|
| 121 |
+
trie.insert("@username2", message_ids=[4, 5])
|
| 122 |
+
|
| 123 |
+
results = trie.search_prefix("@user") # Returns both
|
| 124 |
+
completions = trie.autocomplete("@user", limit=5)
|
| 125 |
+
"""
|
| 126 |
+
|
| 127 |
+
def __init__(self):
|
| 128 |
+
self.root = TrieNode()
|
| 129 |
+
self.size = 0
|
| 130 |
+
|
| 131 |
+
def insert(self, key: str, data: Any = None) -> None:
|
| 132 |
+
"""Insert a key with optional associated data. O(k)."""
|
| 133 |
+
node = self.root
|
| 134 |
+
for char in key.lower():
|
| 135 |
+
if char not in node.children:
|
| 136 |
+
node.children[char] = TrieNode()
|
| 137 |
+
node = node.children[char]
|
| 138 |
+
node.count += 1
|
| 139 |
+
|
| 140 |
+
if not node.is_end:
|
| 141 |
+
self.size += 1
|
| 142 |
+
node.is_end = True
|
| 143 |
+
|
| 144 |
+
# Store or append data
|
| 145 |
+
if data is not None:
|
| 146 |
+
if node.data is None:
|
| 147 |
+
node.data = []
|
| 148 |
+
if isinstance(data, list):
|
| 149 |
+
node.data.extend(data)
|
| 150 |
+
else:
|
| 151 |
+
node.data.append(data)
|
| 152 |
+
|
| 153 |
+
def search(self, key: str) -> Optional[Any]:
|
| 154 |
+
"""Search for exact key. O(k). Returns associated data or None."""
|
| 155 |
+
node = self._find_node(key.lower())
|
| 156 |
+
return node.data if node and node.is_end else None
|
| 157 |
+
|
| 158 |
+
def __contains__(self, key: str) -> bool:
|
| 159 |
+
"""Check if key exists. O(k)."""
|
| 160 |
+
node = self._find_node(key.lower())
|
| 161 |
+
return node is not None and node.is_end
|
| 162 |
+
|
| 163 |
+
def _find_node(self, prefix: str) -> Optional[TrieNode]:
|
| 164 |
+
"""Find the node for a given prefix."""
|
| 165 |
+
node = self.root
|
| 166 |
+
for char in prefix:
|
| 167 |
+
if char not in node.children:
|
| 168 |
+
return None
|
| 169 |
+
node = node.children[char]
|
| 170 |
+
return node
|
| 171 |
+
|
| 172 |
+
def search_prefix(self, prefix: str) -> list[tuple[str, Any]]:
|
| 173 |
+
"""
|
| 174 |
+
Find all keys with given prefix. O(p + n).
|
| 175 |
+
Returns list of (key, data) tuples.
|
| 176 |
+
"""
|
| 177 |
+
results = []
|
| 178 |
+
node = self._find_node(prefix.lower())
|
| 179 |
+
if node:
|
| 180 |
+
self._collect_all(node, prefix.lower(), results)
|
| 181 |
+
return results
|
| 182 |
+
|
| 183 |
+
def _collect_all(
|
| 184 |
+
self,
|
| 185 |
+
node: TrieNode,
|
| 186 |
+
prefix: str,
|
| 187 |
+
results: list[tuple[str, Any]]
|
| 188 |
+
) -> None:
|
| 189 |
+
"""Recursively collect all keys under a node."""
|
| 190 |
+
if node.is_end:
|
| 191 |
+
results.append((prefix, node.data))
|
| 192 |
+
for char, child in node.children.items():
|
| 193 |
+
self._collect_all(child, prefix + char, results)
|
| 194 |
+
|
| 195 |
+
def autocomplete(self, prefix: str, limit: int = 10) -> list[str]:
|
| 196 |
+
"""
|
| 197 |
+
Get autocomplete suggestions for prefix.
|
| 198 |
+
Returns most frequent completions up to limit.
|
| 199 |
+
"""
|
| 200 |
+
node = self._find_node(prefix.lower())
|
| 201 |
+
if not node:
|
| 202 |
+
return []
|
| 203 |
+
|
| 204 |
+
suggestions = []
|
| 205 |
+
self._collect_suggestions(node, prefix.lower(), suggestions)
|
| 206 |
+
|
| 207 |
+
# Sort by frequency and return top results
|
| 208 |
+
suggestions.sort(key=lambda x: x[1], reverse=True)
|
| 209 |
+
return [s[0] for s in suggestions[:limit]]
|
| 210 |
+
|
| 211 |
+
def _collect_suggestions(
|
| 212 |
+
self,
|
| 213 |
+
node: TrieNode,
|
| 214 |
+
prefix: str,
|
| 215 |
+
suggestions: list[tuple[str, int]]
|
| 216 |
+
) -> None:
|
| 217 |
+
"""Collect suggestions with their frequency counts."""
|
| 218 |
+
if node.is_end:
|
| 219 |
+
suggestions.append((prefix, node.count))
|
| 220 |
+
for char, child in node.children.items():
|
| 221 |
+
self._collect_suggestions(child, prefix + char, suggestions)
|
| 222 |
+
|
| 223 |
+
def __len__(self) -> int:
|
| 224 |
+
return self.size
|
| 225 |
+
|
| 226 |
+
|
| 227 |
+
# ============================================
|
| 228 |
+
# LRU CACHE
|
| 229 |
+
# ============================================
|
| 230 |
+
|
| 231 |
+
class LRUCache:
|
| 232 |
+
"""
|
| 233 |
+
Least Recently Used (LRU) Cache for query results.
|
| 234 |
+
|
| 235 |
+
- O(1) get/put operations
|
| 236 |
+
- Automatically evicts least recently used items when full
|
| 237 |
+
- Use case: Cache expensive query results
|
| 238 |
+
|
| 239 |
+
Example:
|
| 240 |
+
cache = LRUCache(maxsize=1000)
|
| 241 |
+
cache.put("query:hello", results)
|
| 242 |
+
results = cache.get("query:hello") # O(1)
|
| 243 |
+
"""
|
| 244 |
+
|
| 245 |
+
def __init__(self, maxsize: int = 1000):
|
| 246 |
+
self.maxsize = maxsize
|
| 247 |
+
self.cache: OrderedDict[str, Any] = OrderedDict()
|
| 248 |
+
self.hits = 0
|
| 249 |
+
self.misses = 0
|
| 250 |
+
|
| 251 |
+
def get(self, key: str) -> Optional[Any]:
|
| 252 |
+
"""Get item from cache. O(1). Returns None if not found."""
|
| 253 |
+
if key in self.cache:
|
| 254 |
+
self.cache.move_to_end(key)
|
| 255 |
+
self.hits += 1
|
| 256 |
+
return self.cache[key]
|
| 257 |
+
self.misses += 1
|
| 258 |
+
return None
|
| 259 |
+
|
| 260 |
+
def put(self, key: str, value: Any) -> None:
|
| 261 |
+
"""Put item in cache. O(1). Evicts LRU item if full."""
|
| 262 |
+
if key in self.cache:
|
| 263 |
+
self.cache.move_to_end(key)
|
| 264 |
+
else:
|
| 265 |
+
if len(self.cache) >= self.maxsize:
|
| 266 |
+
self.cache.popitem(last=False)
|
| 267 |
+
self.cache[key] = value
|
| 268 |
+
|
| 269 |
+
def __contains__(self, key: str) -> bool:
|
| 270 |
+
return key in self.cache
|
| 271 |
+
|
| 272 |
+
def __len__(self) -> int:
|
| 273 |
+
return len(self.cache)
|
| 274 |
+
|
| 275 |
+
def clear(self) -> None:
|
| 276 |
+
"""Clear the cache."""
|
| 277 |
+
self.cache.clear()
|
| 278 |
+
self.hits = 0
|
| 279 |
+
self.misses = 0
|
| 280 |
+
|
| 281 |
+
@property
|
| 282 |
+
def hit_rate(self) -> float:
|
| 283 |
+
"""Return cache hit rate."""
|
| 284 |
+
total = self.hits + self.misses
|
| 285 |
+
return self.hits / total if total > 0 else 0.0
|
| 286 |
+
|
| 287 |
+
@property
|
| 288 |
+
def stats(self) -> dict:
|
| 289 |
+
"""Return cache statistics."""
|
| 290 |
+
return {
|
| 291 |
+
'size': len(self.cache),
|
| 292 |
+
'maxsize': self.maxsize,
|
| 293 |
+
'hits': self.hits,
|
| 294 |
+
'misses': self.misses,
|
| 295 |
+
'hit_rate': self.hit_rate
|
| 296 |
+
}
|
| 297 |
+
|
| 298 |
+
|
| 299 |
+
def lru_cached(cache: LRUCache, key_func: Callable[..., str] = None):
|
| 300 |
+
"""
|
| 301 |
+
Decorator to cache function results using LRUCache.
|
| 302 |
+
|
| 303 |
+
Example:
|
| 304 |
+
cache = LRUCache(1000)
|
| 305 |
+
|
| 306 |
+
@lru_cached(cache, key_func=lambda q, **kw: f"search:{q}")
|
| 307 |
+
def search(query, limit=100):
|
| 308 |
+
return expensive_search(query, limit)
|
| 309 |
+
"""
|
| 310 |
+
def decorator(func: Callable) -> Callable:
|
| 311 |
+
@wraps(func)
|
| 312 |
+
def wrapper(*args, **kwargs):
|
| 313 |
+
if key_func:
|
| 314 |
+
key = key_func(*args, **kwargs)
|
| 315 |
+
else:
|
| 316 |
+
key = f"{func.__name__}:{args}:{kwargs}"
|
| 317 |
+
|
| 318 |
+
result = cache.get(key)
|
| 319 |
+
if result is not None:
|
| 320 |
+
return result
|
| 321 |
+
|
| 322 |
+
result = func(*args, **kwargs)
|
| 323 |
+
cache.put(key, result)
|
| 324 |
+
return result
|
| 325 |
+
return wrapper
|
| 326 |
+
return decorator
|
| 327 |
+
|
| 328 |
+
|
| 329 |
+
# ============================================
|
| 330 |
+
# GRAPH ALGORITHMS FOR REPLY THREADS
|
| 331 |
+
# ============================================
|
| 332 |
+
|
| 333 |
+
class ReplyGraph:
|
| 334 |
+
"""
|
| 335 |
+
Graph structure for message reply relationships.
|
| 336 |
+
|
| 337 |
+
Supports:
|
| 338 |
+
- DFS: Depth-first traversal for finding all descendants
|
| 339 |
+
- BFS: Breadth-first traversal for level-order exploration
|
| 340 |
+
- Connected components: Find isolated conversation threads
|
| 341 |
+
- Topological sort: Order messages by reply chain
|
| 342 |
+
|
| 343 |
+
Time complexity: O(V + E) for traversals
|
| 344 |
+
Space complexity: O(V) for visited set
|
| 345 |
+
"""
|
| 346 |
+
|
| 347 |
+
def __init__(self):
|
| 348 |
+
# Adjacency lists
|
| 349 |
+
self.children: dict[int, list[int]] = defaultdict(list) # parent -> [children]
|
| 350 |
+
self.parents: dict[int, int] = {} # child -> parent
|
| 351 |
+
self.nodes: set[int] = set()
|
| 352 |
+
|
| 353 |
+
def add_edge(self, parent_id: int, child_id: int) -> None:
|
| 354 |
+
"""Add a reply relationship. O(1)."""
|
| 355 |
+
self.children[parent_id].append(child_id)
|
| 356 |
+
self.parents[child_id] = parent_id
|
| 357 |
+
self.nodes.add(parent_id)
|
| 358 |
+
self.nodes.add(child_id)
|
| 359 |
+
|
| 360 |
+
def add_message(self, message_id: int, reply_to: Optional[int] = None) -> None:
|
| 361 |
+
"""Add a message, optionally with its reply relationship."""
|
| 362 |
+
self.nodes.add(message_id)
|
| 363 |
+
if reply_to is not None:
|
| 364 |
+
self.add_edge(reply_to, message_id)
|
| 365 |
+
|
| 366 |
+
def get_children(self, message_id: int) -> list[int]:
|
| 367 |
+
"""Get direct replies to a message. O(1)."""
|
| 368 |
+
return self.children.get(message_id, [])
|
| 369 |
+
|
| 370 |
+
def get_parent(self, message_id: int) -> Optional[int]:
|
| 371 |
+
"""Get the message this is a reply to. O(1)."""
|
| 372 |
+
return self.parents.get(message_id)
|
| 373 |
+
|
| 374 |
+
# ==================
|
| 375 |
+
# DFS - Depth First Search
|
| 376 |
+
# ==================
|
| 377 |
+
|
| 378 |
+
def dfs_descendants(self, start_id: int) -> list[int]:
|
| 379 |
+
"""
|
| 380 |
+
DFS: Get all descendants of a message (entire sub-thread).
|
| 381 |
+
|
| 382 |
+
Time: O(V + E)
|
| 383 |
+
Space: O(V)
|
| 384 |
+
|
| 385 |
+
Returns messages in DFS order (deep before wide).
|
| 386 |
+
"""
|
| 387 |
+
result = []
|
| 388 |
+
visited = set()
|
| 389 |
+
|
| 390 |
+
def dfs(node_id: int) -> None:
|
| 391 |
+
if node_id in visited:
|
| 392 |
+
return
|
| 393 |
+
visited.add(node_id)
|
| 394 |
+
result.append(node_id)
|
| 395 |
+
for child_id in self.children.get(node_id, []):
|
| 396 |
+
dfs(child_id)
|
| 397 |
+
|
| 398 |
+
dfs(start_id)
|
| 399 |
+
return result
|
| 400 |
+
|
| 401 |
+
def dfs_iterative(self, start_id: int) -> Iterator[int]:
|
| 402 |
+
"""
|
| 403 |
+
Iterative DFS using explicit stack (avoids recursion limit).
|
| 404 |
+
|
| 405 |
+
Yields message IDs in DFS order.
|
| 406 |
+
"""
|
| 407 |
+
stack = [start_id]
|
| 408 |
+
visited = set()
|
| 409 |
+
|
| 410 |
+
while stack:
|
| 411 |
+
node_id = stack.pop()
|
| 412 |
+
if node_id in visited:
|
| 413 |
+
continue
|
| 414 |
+
visited.add(node_id)
|
| 415 |
+
yield node_id
|
| 416 |
+
|
| 417 |
+
# Add children in reverse order for correct DFS order
|
| 418 |
+
for child_id in reversed(self.children.get(node_id, [])):
|
| 419 |
+
if child_id not in visited:
|
| 420 |
+
stack.append(child_id)
|
| 421 |
+
|
| 422 |
+
# ==================
|
| 423 |
+
# BFS - Breadth First Search
|
| 424 |
+
# ==================
|
| 425 |
+
|
| 426 |
+
def bfs_descendants(self, start_id: int) -> list[int]:
|
| 427 |
+
"""
|
| 428 |
+
BFS: Get all descendants level by level.
|
| 429 |
+
|
| 430 |
+
Time: O(V + E)
|
| 431 |
+
Space: O(V)
|
| 432 |
+
|
| 433 |
+
Returns messages in BFS order (level by level).
|
| 434 |
+
"""
|
| 435 |
+
result = []
|
| 436 |
+
visited = set()
|
| 437 |
+
queue = deque([start_id])
|
| 438 |
+
|
| 439 |
+
while queue:
|
| 440 |
+
node_id = queue.popleft()
|
| 441 |
+
if node_id in visited:
|
| 442 |
+
continue
|
| 443 |
+
visited.add(node_id)
|
| 444 |
+
result.append(node_id)
|
| 445 |
+
|
| 446 |
+
for child_id in self.children.get(node_id, []):
|
| 447 |
+
if child_id not in visited:
|
| 448 |
+
queue.append(child_id)
|
| 449 |
+
|
| 450 |
+
return result
|
| 451 |
+
|
| 452 |
+
def bfs_with_depth(self, start_id: int) -> list[tuple[int, int]]:
|
| 453 |
+
"""
|
| 454 |
+
BFS with depth information.
|
| 455 |
+
|
| 456 |
+
Returns list of (message_id, depth) tuples.
|
| 457 |
+
"""
|
| 458 |
+
result = []
|
| 459 |
+
visited = set()
|
| 460 |
+
queue = deque([(start_id, 0)])
|
| 461 |
+
|
| 462 |
+
while queue:
|
| 463 |
+
node_id, depth = queue.popleft()
|
| 464 |
+
if node_id in visited:
|
| 465 |
+
continue
|
| 466 |
+
visited.add(node_id)
|
| 467 |
+
result.append((node_id, depth))
|
| 468 |
+
|
| 469 |
+
for child_id in self.children.get(node_id, []):
|
| 470 |
+
if child_id not in visited:
|
| 471 |
+
queue.append((child_id, depth + 1))
|
| 472 |
+
|
| 473 |
+
return result
|
| 474 |
+
|
| 475 |
+
# ==================
|
| 476 |
+
# THREAD RECONSTRUCTION
|
| 477 |
+
# ==================
|
| 478 |
+
|
| 479 |
+
def get_thread_root(self, message_id: int) -> int:
|
| 480 |
+
"""
|
| 481 |
+
Find the root message of a thread. O(d) where d is depth.
|
| 482 |
+
"""
|
| 483 |
+
current = message_id
|
| 484 |
+
while current in self.parents:
|
| 485 |
+
current = self.parents[current]
|
| 486 |
+
return current
|
| 487 |
+
|
| 488 |
+
def get_full_thread(self, message_id: int) -> list[int]:
|
| 489 |
+
"""
|
| 490 |
+
Get the complete thread containing a message.
|
| 491 |
+
|
| 492 |
+
1. Find root via parent traversal
|
| 493 |
+
2. BFS from root to get all descendants
|
| 494 |
+
"""
|
| 495 |
+
root = self.get_thread_root(message_id)
|
| 496 |
+
return self.bfs_descendants(root)
|
| 497 |
+
|
| 498 |
+
def get_ancestors(self, message_id: int) -> list[int]:
|
| 499 |
+
"""
|
| 500 |
+
Get all ancestors (path to root). O(d).
|
| 501 |
+
|
| 502 |
+
Returns in order from message to root.
|
| 503 |
+
"""
|
| 504 |
+
ancestors = []
|
| 505 |
+
current = message_id
|
| 506 |
+
while current in self.parents:
|
| 507 |
+
parent = self.parents[current]
|
| 508 |
+
ancestors.append(parent)
|
| 509 |
+
current = parent
|
| 510 |
+
return ancestors
|
| 511 |
+
|
| 512 |
+
def get_thread_path(self, message_id: int) -> list[int]:
|
| 513 |
+
"""
|
| 514 |
+
Get path from root to message. O(d).
|
| 515 |
+
"""
|
| 516 |
+
path = [message_id]
|
| 517 |
+
current = message_id
|
| 518 |
+
while current in self.parents:
|
| 519 |
+
parent = self.parents[current]
|
| 520 |
+
path.append(parent)
|
| 521 |
+
current = parent
|
| 522 |
+
return list(reversed(path))
|
| 523 |
+
|
| 524 |
+
# ==================
|
| 525 |
+
# CONNECTED COMPONENTS
|
| 526 |
+
# ==================
|
| 527 |
+
|
| 528 |
+
def find_connected_components(self) -> list[set[int]]:
|
| 529 |
+
"""
|
| 530 |
+
Find all isolated conversation threads.
|
| 531 |
+
|
| 532 |
+
Time: O(V + E)
|
| 533 |
+
|
| 534 |
+
Returns list of sets, each set is a connected thread.
|
| 535 |
+
"""
|
| 536 |
+
visited = set()
|
| 537 |
+
components = []
|
| 538 |
+
|
| 539 |
+
for node in self.nodes:
|
| 540 |
+
if node not in visited:
|
| 541 |
+
component = set()
|
| 542 |
+
# Use BFS to find all connected nodes
|
| 543 |
+
queue = deque([node])
|
| 544 |
+
while queue:
|
| 545 |
+
current = queue.popleft()
|
| 546 |
+
if current in visited:
|
| 547 |
+
continue
|
| 548 |
+
visited.add(current)
|
| 549 |
+
component.add(current)
|
| 550 |
+
|
| 551 |
+
# Add parent
|
| 552 |
+
if current in self.parents:
|
| 553 |
+
parent = self.parents[current]
|
| 554 |
+
if parent not in visited:
|
| 555 |
+
queue.append(parent)
|
| 556 |
+
|
| 557 |
+
# Add children
|
| 558 |
+
for child in self.children.get(current, []):
|
| 559 |
+
if child not in visited:
|
| 560 |
+
queue.append(child)
|
| 561 |
+
|
| 562 |
+
components.append(component)
|
| 563 |
+
|
| 564 |
+
return components
|
| 565 |
+
|
| 566 |
+
def get_thread_roots(self) -> list[int]:
|
| 567 |
+
"""Get all thread root messages (messages with no parent)."""
|
| 568 |
+
return [node for node in self.nodes if node not in self.parents]
|
| 569 |
+
|
| 570 |
+
# ==================
|
| 571 |
+
# STATISTICS
|
| 572 |
+
# ==================
|
| 573 |
+
|
| 574 |
+
def get_thread_depth(self, root_id: int) -> int:
|
| 575 |
+
"""Get maximum depth of a thread from root."""
|
| 576 |
+
max_depth = 0
|
| 577 |
+
for _, depth in self.bfs_with_depth(root_id):
|
| 578 |
+
max_depth = max(max_depth, depth)
|
| 579 |
+
return max_depth
|
| 580 |
+
|
| 581 |
+
def get_subtree_size(self, message_id: int) -> int:
|
| 582 |
+
"""Get number of messages in subtree including root."""
|
| 583 |
+
return len(self.dfs_descendants(message_id))
|
| 584 |
+
|
| 585 |
+
@property
|
| 586 |
+
def stats(self) -> dict:
|
| 587 |
+
"""Get graph statistics."""
|
| 588 |
+
return {
|
| 589 |
+
'total_nodes': len(self.nodes),
|
| 590 |
+
'total_edges': sum(len(children) for children in self.children.values()),
|
| 591 |
+
'root_messages': len(self.get_thread_roots()),
|
| 592 |
+
'connected_components': len(self.find_connected_components())
|
| 593 |
+
}
|
| 594 |
+
|
| 595 |
+
|
| 596 |
+
# ============================================
|
| 597 |
+
# TRIGRAM SIMILARITY
|
| 598 |
+
# ============================================
|
| 599 |
+
|
| 600 |
+
def generate_trigrams(text: str) -> set[str]:
|
| 601 |
+
"""
|
| 602 |
+
Generate trigrams (3-character subsequences) for fuzzy matching.
|
| 603 |
+
|
| 604 |
+
Example: "hello" -> {"hel", "ell", "llo"}
|
| 605 |
+
"""
|
| 606 |
+
text = text.lower().strip()
|
| 607 |
+
if len(text) < 3:
|
| 608 |
+
return {text} if text else set()
|
| 609 |
+
return {text[i:i+3] for i in range(len(text) - 2)}
|
| 610 |
+
|
| 611 |
+
|
| 612 |
+
def trigram_similarity(text1: str, text2: str) -> float:
|
| 613 |
+
"""
|
| 614 |
+
Calculate Jaccard similarity between trigram sets.
|
| 615 |
+
|
| 616 |
+
Returns value between 0 (no similarity) and 1 (identical).
|
| 617 |
+
"""
|
| 618 |
+
tri1 = generate_trigrams(text1)
|
| 619 |
+
tri2 = generate_trigrams(text2)
|
| 620 |
+
|
| 621 |
+
if not tri1 or not tri2:
|
| 622 |
+
return 0.0
|
| 623 |
+
|
| 624 |
+
intersection = len(tri1 & tri2)
|
| 625 |
+
union = len(tri1 | tri2)
|
| 626 |
+
|
| 627 |
+
return intersection / union if union > 0 else 0.0
|
| 628 |
+
|
| 629 |
+
|
| 630 |
+
class TrigramIndex:
|
| 631 |
+
"""
|
| 632 |
+
Inverted index of trigrams for fuzzy search.
|
| 633 |
+
|
| 634 |
+
Time complexity:
|
| 635 |
+
- Insert: O(k) where k is text length
|
| 636 |
+
- Search: O(t * m) where t is trigrams in query, m is avg matches
|
| 637 |
+
|
| 638 |
+
Example:
|
| 639 |
+
index = TrigramIndex()
|
| 640 |
+
index.add(1, "שלום עולם")
|
| 641 |
+
index.add(2, "שלום לכולם")
|
| 642 |
+
|
| 643 |
+
results = index.search("שלום", threshold=0.3)
|
| 644 |
+
"""
|
| 645 |
+
|
| 646 |
+
def __init__(self):
|
| 647 |
+
self.index: dict[str, set[int]] = defaultdict(set)
|
| 648 |
+
self.texts: dict[int, str] = {}
|
| 649 |
+
|
| 650 |
+
def add(self, doc_id: int, text: str) -> None:
|
| 651 |
+
"""Add a document to the index."""
|
| 652 |
+
self.texts[doc_id] = text
|
| 653 |
+
for trigram in generate_trigrams(text):
|
| 654 |
+
self.index[trigram].add(doc_id)
|
| 655 |
+
|
| 656 |
+
def search(self, query: str, threshold: float = 0.3, limit: int = 100) -> list[tuple[int, float]]:
|
| 657 |
+
"""
|
| 658 |
+
Search for documents similar to query.
|
| 659 |
+
|
| 660 |
+
Returns list of (doc_id, similarity) tuples, sorted by similarity.
|
| 661 |
+
"""
|
| 662 |
+
query_trigrams = generate_trigrams(query)
|
| 663 |
+
if not query_trigrams:
|
| 664 |
+
return []
|
| 665 |
+
|
| 666 |
+
# Find candidate documents
|
| 667 |
+
candidates: dict[int, int] = defaultdict(int)
|
| 668 |
+
for trigram in query_trigrams:
|
| 669 |
+
for doc_id in self.index.get(trigram, []):
|
| 670 |
+
candidates[doc_id] += 1
|
| 671 |
+
|
| 672 |
+
# Calculate similarity for candidates
|
| 673 |
+
results = []
|
| 674 |
+
query_len = len(query_trigrams)
|
| 675 |
+
|
| 676 |
+
for doc_id, match_count in candidates.items():
|
| 677 |
+
doc_trigrams = generate_trigrams(self.texts[doc_id])
|
| 678 |
+
doc_len = len(doc_trigrams)
|
| 679 |
+
|
| 680 |
+
# Jaccard similarity approximation
|
| 681 |
+
similarity = match_count / (query_len + doc_len - match_count)
|
| 682 |
+
|
| 683 |
+
if similarity >= threshold:
|
| 684 |
+
results.append((doc_id, similarity))
|
| 685 |
+
|
| 686 |
+
# Sort by similarity descending
|
| 687 |
+
results.sort(key=lambda x: x[1], reverse=True)
|
| 688 |
+
return results[:limit]
|
| 689 |
+
|
| 690 |
+
def __len__(self) -> int:
|
| 691 |
+
return len(self.texts)
|
| 692 |
+
|
| 693 |
+
|
| 694 |
+
# ============================================
|
| 695 |
+
# INVERTED INDEX
|
| 696 |
+
# ============================================
|
| 697 |
+
|
| 698 |
+
class InvertedIndex:
|
| 699 |
+
"""
|
| 700 |
+
Simple inverted index for fast word-to-document lookup.
|
| 701 |
+
|
| 702 |
+
Time complexity:
|
| 703 |
+
- Insert: O(w) where w is word count
|
| 704 |
+
- Search: O(1) for single word
|
| 705 |
+
- AND/OR queries: O(min(n1, n2)) for set operations
|
| 706 |
+
"""
|
| 707 |
+
|
| 708 |
+
def __init__(self):
|
| 709 |
+
self.index: dict[str, set[int]] = defaultdict(set)
|
| 710 |
+
self.doc_count = 0
|
| 711 |
+
|
| 712 |
+
def add(self, doc_id: int, text: str) -> None:
|
| 713 |
+
"""Add document to index."""
|
| 714 |
+
words = self._tokenize(text)
|
| 715 |
+
for word in words:
|
| 716 |
+
self.index[word].add(doc_id)
|
| 717 |
+
self.doc_count += 1
|
| 718 |
+
|
| 719 |
+
def _tokenize(self, text: str) -> list[str]:
|
| 720 |
+
"""Simple tokenization."""
|
| 721 |
+
import re
|
| 722 |
+
return re.findall(r'[\u0590-\u05FFa-zA-Z]+', text.lower())
|
| 723 |
+
|
| 724 |
+
def search(self, word: str) -> set[int]:
|
| 725 |
+
"""Find all documents containing word."""
|
| 726 |
+
return self.index.get(word.lower(), set())
|
| 727 |
+
|
| 728 |
+
def search_and(self, words: list[str]) -> set[int]:
|
| 729 |
+
"""Find documents containing ALL words."""
|
| 730 |
+
if not words:
|
| 731 |
+
return set()
|
| 732 |
+
result = self.search(words[0])
|
| 733 |
+
for word in words[1:]:
|
| 734 |
+
result &= self.search(word)
|
| 735 |
+
return result
|
| 736 |
+
|
| 737 |
+
def search_or(self, words: list[str]) -> set[int]:
|
| 738 |
+
"""Find documents containing ANY word."""
|
| 739 |
+
result = set()
|
| 740 |
+
for word in words:
|
| 741 |
+
result |= self.search(word)
|
| 742 |
+
return result
|
| 743 |
+
|
| 744 |
+
|
| 745 |
+
if __name__ == '__main__':
|
| 746 |
+
# Demo
|
| 747 |
+
print("=== Bloom Filter Demo ===")
|
| 748 |
+
bf = BloomFilter(expected_items=1000, fp_rate=0.01)
|
| 749 |
+
bf.add("message_1")
|
| 750 |
+
bf.add("message_2")
|
| 751 |
+
print(f"message_1 in filter: {'message_1' in bf}")
|
| 752 |
+
print(f"message_999 in filter: {'message_999' in bf}")
|
| 753 |
+
print(f"Memory usage: {bf.memory_usage} bytes")
|
| 754 |
+
|
| 755 |
+
print("\n=== Trie Demo ===")
|
| 756 |
+
trie = Trie()
|
| 757 |
+
trie.insert("@username1", data=1)
|
| 758 |
+
trie.insert("@username2", data=2)
|
| 759 |
+
trie.insert("@user_test", data=3)
|
| 760 |
+
print(f"Autocomplete '@user': {trie.autocomplete('@user')}")
|
| 761 |
+
|
| 762 |
+
print("\n=== Reply Graph Demo ===")
|
| 763 |
+
graph = ReplyGraph()
|
| 764 |
+
graph.add_message(1)
|
| 765 |
+
graph.add_message(2, reply_to=1)
|
| 766 |
+
graph.add_message(3, reply_to=1)
|
| 767 |
+
graph.add_message(4, reply_to=2)
|
| 768 |
+
graph.add_message(5, reply_to=2)
|
| 769 |
+
|
| 770 |
+
print(f"DFS from 1: {graph.dfs_descendants(1)}")
|
| 771 |
+
print(f"BFS from 1: {graph.bfs_descendants(1)}")
|
| 772 |
+
print(f"Thread path for 4: {graph.get_thread_path(4)}")
|
| 773 |
+
print(f"Stats: {graph.stats}")
|
indexer.py
ADDED
|
@@ -0,0 +1,817 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Telegram JSON Chat Indexer (Optimized)
|
| 4 |
+
|
| 5 |
+
Features:
|
| 6 |
+
- Batch processing for faster indexing
|
| 7 |
+
- Graph building for reply threads
|
| 8 |
+
- Trigram index for fuzzy search
|
| 9 |
+
- Progress tracking
|
| 10 |
+
- Memory-efficient streaming
|
| 11 |
+
|
| 12 |
+
Usage:
|
| 13 |
+
python indexer.py <json_file> [--db <database_file>]
|
| 14 |
+
python indexer.py result.json --db telegram.db
|
| 15 |
+
python indexer.py result.json --batch-size 5000 --build-trigrams
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
import json
|
| 19 |
+
import sqlite3
|
| 20 |
+
import argparse
|
| 21 |
+
|
| 22 |
+
try:
|
| 23 |
+
import ijson
|
| 24 |
+
HAS_IJSON = True
|
| 25 |
+
except ImportError:
|
| 26 |
+
HAS_IJSON = False
|
| 27 |
+
import os
|
| 28 |
+
import time
|
| 29 |
+
from pathlib import Path
|
| 30 |
+
from typing import Any, Generator
|
| 31 |
+
from collections import defaultdict
|
| 32 |
+
|
| 33 |
+
from data_structures import BloomFilter, ReplyGraph, generate_trigrams
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def flatten_text(text_field: Any) -> str:
|
| 37 |
+
"""
|
| 38 |
+
Flatten the text field which can be either a string or array of mixed content.
|
| 39 |
+
"""
|
| 40 |
+
if isinstance(text_field, str):
|
| 41 |
+
return text_field
|
| 42 |
+
|
| 43 |
+
if isinstance(text_field, list):
|
| 44 |
+
parts = []
|
| 45 |
+
for item in text_field:
|
| 46 |
+
if isinstance(item, str):
|
| 47 |
+
parts.append(item)
|
| 48 |
+
elif isinstance(item, dict) and 'text' in item:
|
| 49 |
+
parts.append(item['text'])
|
| 50 |
+
return ''.join(parts)
|
| 51 |
+
|
| 52 |
+
return ''
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def extract_entities(text_entities: list) -> list[dict]:
|
| 56 |
+
"""Extract typed entities (links, mentions, etc.) from text_entities array."""
|
| 57 |
+
entities = []
|
| 58 |
+
for entity in text_entities or []:
|
| 59 |
+
if isinstance(entity, dict):
|
| 60 |
+
entity_type = entity.get('type', 'plain')
|
| 61 |
+
if entity_type != 'plain':
|
| 62 |
+
entities.append({
|
| 63 |
+
'type': entity_type,
|
| 64 |
+
'value': entity.get('text', '')
|
| 65 |
+
})
|
| 66 |
+
return entities
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def parse_message(msg: dict) -> dict | None:
|
| 70 |
+
"""Parse a single message from Telegram JSON format."""
|
| 71 |
+
if msg.get('type') != 'message':
|
| 72 |
+
return None
|
| 73 |
+
|
| 74 |
+
text_plain = flatten_text(msg.get('text', ''))
|
| 75 |
+
entities = extract_entities(msg.get('text_entities', []))
|
| 76 |
+
|
| 77 |
+
has_links = any(e['type'] == 'link' for e in entities)
|
| 78 |
+
has_mentions = any(e['type'] == 'mention' for e in entities)
|
| 79 |
+
|
| 80 |
+
return {
|
| 81 |
+
'id': msg.get('id'),
|
| 82 |
+
'type': msg.get('type', 'message'),
|
| 83 |
+
'date': msg.get('date'),
|
| 84 |
+
'date_unixtime': int(msg.get('date_unixtime', 0)) if msg.get('date_unixtime') else 0,
|
| 85 |
+
'from_name': msg.get('from', ''),
|
| 86 |
+
'from_id': msg.get('from_id', ''),
|
| 87 |
+
'reply_to_message_id': msg.get('reply_to_message_id'),
|
| 88 |
+
'forwarded_from': msg.get('forwarded_from'),
|
| 89 |
+
'forwarded_from_id': msg.get('forwarded_from_id'),
|
| 90 |
+
'text_plain': text_plain,
|
| 91 |
+
'text_length': len(text_plain),
|
| 92 |
+
'has_media': 1 if msg.get('photo') or msg.get('file') or msg.get('media_type') else 0,
|
| 93 |
+
'has_photo': 1 if msg.get('photo') else 0,
|
| 94 |
+
'has_links': 1 if has_links else 0,
|
| 95 |
+
'has_mentions': 1 if has_mentions else 0,
|
| 96 |
+
'is_edited': 1 if msg.get('edited') else 0,
|
| 97 |
+
'edited_unixtime': int(msg.get('edited_unixtime', 0)) if msg.get('edited_unixtime') else None,
|
| 98 |
+
'photo_file_size': msg.get('photo_file_size'),
|
| 99 |
+
'photo_width': msg.get('width'),
|
| 100 |
+
'photo_height': msg.get('height'),
|
| 101 |
+
'raw_json': json.dumps(msg, ensure_ascii=False),
|
| 102 |
+
'entities': entities
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
def _detect_json_structure(json_path: str) -> str:
|
| 107 |
+
"""Peek at JSON to determine if root is a list or object with 'messages' key."""
|
| 108 |
+
with open(json_path, 'r', encoding='utf-8') as f:
|
| 109 |
+
for char in iter(lambda: f.read(1), ''):
|
| 110 |
+
if char in ' \t\n\r':
|
| 111 |
+
continue
|
| 112 |
+
if char == '[':
|
| 113 |
+
return 'list'
|
| 114 |
+
return 'object'
|
| 115 |
+
return 'object'
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
def load_json_messages(json_path: str) -> Generator[dict, None, None]:
|
| 119 |
+
"""
|
| 120 |
+
Load messages from Telegram export JSON file.
|
| 121 |
+
|
| 122 |
+
Uses ijson for streaming (constant memory) if available,
|
| 123 |
+
otherwise falls back to full json.load().
|
| 124 |
+
"""
|
| 125 |
+
if HAS_IJSON:
|
| 126 |
+
structure = _detect_json_structure(json_path)
|
| 127 |
+
prefix = 'item' if structure == 'list' else 'messages.item'
|
| 128 |
+
with open(json_path, 'rb') as f:
|
| 129 |
+
for msg in ijson.items(f, prefix):
|
| 130 |
+
parsed = parse_message(msg)
|
| 131 |
+
if parsed:
|
| 132 |
+
yield parsed
|
| 133 |
+
else:
|
| 134 |
+
with open(json_path, 'r', encoding='utf-8') as f:
|
| 135 |
+
data = json.load(f)
|
| 136 |
+
messages = data if isinstance(data, list) else data.get('messages', [])
|
| 137 |
+
for msg in messages:
|
| 138 |
+
parsed = parse_message(msg)
|
| 139 |
+
if parsed:
|
| 140 |
+
yield parsed
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
def count_messages(json_path: str) -> int:
|
| 144 |
+
"""Count messages in JSON file. Uses streaming if ijson available."""
|
| 145 |
+
if HAS_IJSON:
|
| 146 |
+
structure = _detect_json_structure(json_path)
|
| 147 |
+
prefix = 'item' if structure == 'list' else 'messages.item'
|
| 148 |
+
count = 0
|
| 149 |
+
with open(json_path, 'rb') as f:
|
| 150 |
+
for msg in ijson.items(f, prefix):
|
| 151 |
+
if msg.get('type') == 'message':
|
| 152 |
+
count += 1
|
| 153 |
+
return count
|
| 154 |
+
else:
|
| 155 |
+
with open(json_path, 'r', encoding='utf-8') as f:
|
| 156 |
+
data = json.load(f)
|
| 157 |
+
messages = data if isinstance(data, list) else data.get('messages', [])
|
| 158 |
+
return sum(1 for msg in messages if msg.get('type') == 'message')
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
def init_database(db_path: str) -> sqlite3.Connection:
|
| 162 |
+
"""Initialize SQLite database with optimized schema."""
|
| 163 |
+
conn = sqlite3.connect(db_path)
|
| 164 |
+
conn.row_factory = sqlite3.Row
|
| 165 |
+
|
| 166 |
+
# Read and execute schema
|
| 167 |
+
schema_path = Path(__file__).parent / 'schema.sql'
|
| 168 |
+
if schema_path.exists():
|
| 169 |
+
with open(schema_path, 'r') as f:
|
| 170 |
+
conn.executescript(f.read())
|
| 171 |
+
else:
|
| 172 |
+
raise FileNotFoundError(f"Schema file not found: {schema_path}")
|
| 173 |
+
|
| 174 |
+
return conn
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
class OptimizedIndexer:
|
| 178 |
+
"""
|
| 179 |
+
High-performance indexer with batch processing and graph building.
|
| 180 |
+
|
| 181 |
+
Features:
|
| 182 |
+
- Batch inserts (100x faster than individual inserts)
|
| 183 |
+
- Bloom filter for duplicate detection
|
| 184 |
+
- Reply graph construction
|
| 185 |
+
- Trigram index building
|
| 186 |
+
- Progress tracking
|
| 187 |
+
"""
|
| 188 |
+
|
| 189 |
+
def __init__(
|
| 190 |
+
self,
|
| 191 |
+
db_path: str,
|
| 192 |
+
batch_size: int = 1000,
|
| 193 |
+
build_trigrams: bool = False,
|
| 194 |
+
build_graph: bool = True
|
| 195 |
+
):
|
| 196 |
+
self.db_path = db_path
|
| 197 |
+
self.batch_size = batch_size
|
| 198 |
+
self.build_trigrams = build_trigrams
|
| 199 |
+
self.build_graph = build_graph
|
| 200 |
+
|
| 201 |
+
self.conn = init_database(db_path)
|
| 202 |
+
self.bloom = BloomFilter(expected_items=1000000, fp_rate=0.01)
|
| 203 |
+
self.graph = ReplyGraph() if build_graph else None
|
| 204 |
+
|
| 205 |
+
# Batch buffers
|
| 206 |
+
self.message_batch: list[tuple] = []
|
| 207 |
+
self.entity_batch: list[tuple] = []
|
| 208 |
+
self.trigram_batch: list[tuple] = []
|
| 209 |
+
|
| 210 |
+
# Stats
|
| 211 |
+
self.stats = {
|
| 212 |
+
'messages': 0,
|
| 213 |
+
'entities': 0,
|
| 214 |
+
'trigrams': 0,
|
| 215 |
+
'users': {},
|
| 216 |
+
'skipped': 0,
|
| 217 |
+
'duplicates': 0
|
| 218 |
+
}
|
| 219 |
+
|
| 220 |
+
def index_file(self, json_path: str, show_progress: bool = True) -> dict:
|
| 221 |
+
"""
|
| 222 |
+
Index a JSON file into the database.
|
| 223 |
+
|
| 224 |
+
Returns statistics dict.
|
| 225 |
+
"""
|
| 226 |
+
start_time = time.time()
|
| 227 |
+
|
| 228 |
+
# Count total for progress
|
| 229 |
+
if show_progress:
|
| 230 |
+
print(f"Counting messages in {json_path}...")
|
| 231 |
+
total = count_messages(json_path)
|
| 232 |
+
print(f"Found {total:,} messages to index")
|
| 233 |
+
else:
|
| 234 |
+
total = 0
|
| 235 |
+
|
| 236 |
+
# Disable auto-commit for batch processing
|
| 237 |
+
self.conn.execute('BEGIN TRANSACTION')
|
| 238 |
+
|
| 239 |
+
try:
|
| 240 |
+
for i, msg in enumerate(load_json_messages(json_path)):
|
| 241 |
+
self._index_message(msg)
|
| 242 |
+
|
| 243 |
+
# Progress update
|
| 244 |
+
if show_progress and (i + 1) % 10000 == 0:
|
| 245 |
+
elapsed = time.time() - start_time
|
| 246 |
+
rate = (i + 1) / elapsed
|
| 247 |
+
eta = (total - i - 1) / rate if rate > 0 else 0
|
| 248 |
+
print(f" Indexed {i+1:,}/{total:,} ({100*(i+1)/total:.1f}%) "
|
| 249 |
+
f"- {rate:.0f} msg/s - ETA: {eta:.0f}s")
|
| 250 |
+
|
| 251 |
+
# Flush remaining batches
|
| 252 |
+
self._flush_batches()
|
| 253 |
+
|
| 254 |
+
# Build reply graph in database
|
| 255 |
+
if self.build_graph:
|
| 256 |
+
self._build_graph_tables()
|
| 257 |
+
|
| 258 |
+
# Update users table
|
| 259 |
+
self._update_users()
|
| 260 |
+
|
| 261 |
+
# Commit transaction
|
| 262 |
+
self.conn.commit()
|
| 263 |
+
|
| 264 |
+
# Optimize FTS index
|
| 265 |
+
print("Optimizing FTS index...")
|
| 266 |
+
self.conn.execute("INSERT INTO messages_fts(messages_fts) VALUES('optimize')")
|
| 267 |
+
self.conn.commit()
|
| 268 |
+
|
| 269 |
+
except Exception as e:
|
| 270 |
+
self.conn.rollback()
|
| 271 |
+
raise e
|
| 272 |
+
|
| 273 |
+
elapsed = time.time() - start_time
|
| 274 |
+
self.stats['elapsed_seconds'] = elapsed
|
| 275 |
+
self.stats['messages_per_second'] = self.stats['messages'] / elapsed if elapsed > 0 else 0
|
| 276 |
+
|
| 277 |
+
return self.stats
|
| 278 |
+
|
| 279 |
+
def _index_message(self, msg: dict) -> None:
|
| 280 |
+
"""Index a single message into batch buffers."""
|
| 281 |
+
msg_id = msg['id']
|
| 282 |
+
|
| 283 |
+
# Duplicate check with Bloom filter
|
| 284 |
+
msg_key = f"msg_{msg_id}"
|
| 285 |
+
if msg_key in self.bloom:
|
| 286 |
+
self.stats['duplicates'] += 1
|
| 287 |
+
return
|
| 288 |
+
self.bloom.add(msg_key)
|
| 289 |
+
|
| 290 |
+
# Add to message batch
|
| 291 |
+
self.message_batch.append((
|
| 292 |
+
msg['id'], msg['type'], msg['date'], msg['date_unixtime'],
|
| 293 |
+
msg['from_name'], msg['from_id'], msg['reply_to_message_id'],
|
| 294 |
+
msg['forwarded_from'], msg['forwarded_from_id'], msg['text_plain'],
|
| 295 |
+
msg['text_length'], msg['has_media'], msg['has_photo'],
|
| 296 |
+
msg['has_links'], msg['has_mentions'], msg['is_edited'],
|
| 297 |
+
msg['edited_unixtime'], msg['photo_file_size'],
|
| 298 |
+
msg['photo_width'], msg['photo_height'], msg['raw_json']
|
| 299 |
+
))
|
| 300 |
+
|
| 301 |
+
# Add entities to batch
|
| 302 |
+
for entity in msg['entities']:
|
| 303 |
+
self.entity_batch.append((msg_id, entity['type'], entity['value']))
|
| 304 |
+
|
| 305 |
+
# Add trigrams if enabled
|
| 306 |
+
if self.build_trigrams and msg['text_plain']:
|
| 307 |
+
for i, trigram in enumerate(generate_trigrams(msg['text_plain'])):
|
| 308 |
+
self.trigram_batch.append((trigram, msg_id, i))
|
| 309 |
+
|
| 310 |
+
# Build graph
|
| 311 |
+
if self.graph:
|
| 312 |
+
self.graph.add_message(msg_id, msg['reply_to_message_id'])
|
| 313 |
+
|
| 314 |
+
# Track users
|
| 315 |
+
user_id = msg['from_id']
|
| 316 |
+
if user_id:
|
| 317 |
+
if user_id not in self.stats['users']:
|
| 318 |
+
self.stats['users'][user_id] = {
|
| 319 |
+
'display_name': msg['from_name'],
|
| 320 |
+
'first_seen': msg['date_unixtime'],
|
| 321 |
+
'last_seen': msg['date_unixtime'],
|
| 322 |
+
'count': 0
|
| 323 |
+
}
|
| 324 |
+
self.stats['users'][user_id]['count'] += 1
|
| 325 |
+
ts = msg['date_unixtime']
|
| 326 |
+
if ts and ts < self.stats['users'][user_id]['first_seen']:
|
| 327 |
+
self.stats['users'][user_id]['first_seen'] = ts
|
| 328 |
+
if ts and ts > self.stats['users'][user_id]['last_seen']:
|
| 329 |
+
self.stats['users'][user_id]['last_seen'] = ts
|
| 330 |
+
|
| 331 |
+
self.stats['messages'] += 1
|
| 332 |
+
|
| 333 |
+
# Flush if batch is full
|
| 334 |
+
if len(self.message_batch) >= self.batch_size:
|
| 335 |
+
self._flush_batches()
|
| 336 |
+
|
| 337 |
+
def _flush_batches(self) -> None:
|
| 338 |
+
"""Flush all batch buffers to database."""
|
| 339 |
+
cursor = self.conn.cursor()
|
| 340 |
+
|
| 341 |
+
# Insert messages
|
| 342 |
+
if self.message_batch:
|
| 343 |
+
cursor.executemany('''
|
| 344 |
+
INSERT OR REPLACE INTO messages (
|
| 345 |
+
id, type, date, date_unixtime, from_name, from_id,
|
| 346 |
+
reply_to_message_id, forwarded_from, forwarded_from_id,
|
| 347 |
+
text_plain, text_length, has_media, has_photo, has_links,
|
| 348 |
+
has_mentions, is_edited, edited_unixtime, photo_file_size,
|
| 349 |
+
photo_width, photo_height, raw_json
|
| 350 |
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
| 351 |
+
''', self.message_batch)
|
| 352 |
+
self.message_batch = []
|
| 353 |
+
|
| 354 |
+
# Insert entities
|
| 355 |
+
if self.entity_batch:
|
| 356 |
+
cursor.executemany('''
|
| 357 |
+
INSERT INTO entities (message_id, type, value)
|
| 358 |
+
VALUES (?, ?, ?)
|
| 359 |
+
''', self.entity_batch)
|
| 360 |
+
self.stats['entities'] += len(self.entity_batch)
|
| 361 |
+
self.entity_batch = []
|
| 362 |
+
|
| 363 |
+
# Insert trigrams
|
| 364 |
+
if self.trigram_batch:
|
| 365 |
+
cursor.executemany('''
|
| 366 |
+
INSERT OR IGNORE INTO trigrams (trigram, message_id, position)
|
| 367 |
+
VALUES (?, ?, ?)
|
| 368 |
+
''', self.trigram_batch)
|
| 369 |
+
self.stats['trigrams'] += len(self.trigram_batch)
|
| 370 |
+
self.trigram_batch = []
|
| 371 |
+
|
| 372 |
+
def _build_graph_tables(self) -> None:
|
| 373 |
+
"""Build reply graph tables from in-memory graph."""
|
| 374 |
+
if not self.graph:
|
| 375 |
+
return
|
| 376 |
+
|
| 377 |
+
print("Building reply graph tables...")
|
| 378 |
+
cursor = self.conn.cursor()
|
| 379 |
+
|
| 380 |
+
# Insert edges into reply_graph
|
| 381 |
+
edges = []
|
| 382 |
+
for parent_id, children in self.graph.children.items():
|
| 383 |
+
for child_id in children:
|
| 384 |
+
edges.append((parent_id, child_id, 1))
|
| 385 |
+
|
| 386 |
+
if edges:
|
| 387 |
+
cursor.executemany('''
|
| 388 |
+
INSERT OR IGNORE INTO reply_graph (parent_id, child_id, depth)
|
| 389 |
+
VALUES (?, ?, ?)
|
| 390 |
+
''', edges)
|
| 391 |
+
|
| 392 |
+
# Find connected components (threads)
|
| 393 |
+
print("Finding conversation threads...")
|
| 394 |
+
components = self.graph.find_connected_components()
|
| 395 |
+
|
| 396 |
+
thread_data = []
|
| 397 |
+
message_thread_data = []
|
| 398 |
+
|
| 399 |
+
for thread_id, component in enumerate(components):
|
| 400 |
+
if not component:
|
| 401 |
+
continue
|
| 402 |
+
|
| 403 |
+
# Find root (message with no parent in this component)
|
| 404 |
+
root_id = None
|
| 405 |
+
for msg_id in component:
|
| 406 |
+
if msg_id not in self.graph.parents:
|
| 407 |
+
root_id = msg_id
|
| 408 |
+
break
|
| 409 |
+
if root_id is None:
|
| 410 |
+
root_id = min(component)
|
| 411 |
+
|
| 412 |
+
# Get thread stats
|
| 413 |
+
cursor.execute('''
|
| 414 |
+
SELECT MIN(date_unixtime), MAX(date_unixtime), COUNT(DISTINCT from_id)
|
| 415 |
+
FROM messages WHERE id IN ({})
|
| 416 |
+
'''.format(','.join('?' * len(component))), list(component))
|
| 417 |
+
row = cursor.fetchone()
|
| 418 |
+
|
| 419 |
+
thread_data.append((
|
| 420 |
+
root_id,
|
| 421 |
+
len(component),
|
| 422 |
+
row[0], # first_message_time
|
| 423 |
+
row[1], # last_message_time
|
| 424 |
+
row[2] # participant_count
|
| 425 |
+
))
|
| 426 |
+
|
| 427 |
+
# Map messages to threads with depth
|
| 428 |
+
for msg_id in component:
|
| 429 |
+
depth = len(self.graph.get_ancestors(msg_id))
|
| 430 |
+
message_thread_data.append((msg_id, len(thread_data), depth))
|
| 431 |
+
|
| 432 |
+
# Insert thread data
|
| 433 |
+
cursor.executemany('''
|
| 434 |
+
INSERT INTO threads (root_message_id, message_count, first_message_time,
|
| 435 |
+
last_message_time, participant_count)
|
| 436 |
+
VALUES (?, ?, ?, ?, ?)
|
| 437 |
+
''', thread_data)
|
| 438 |
+
|
| 439 |
+
cursor.executemany('''
|
| 440 |
+
INSERT OR REPLACE INTO message_threads (message_id, thread_id, depth)
|
| 441 |
+
VALUES (?, ?, ?)
|
| 442 |
+
''', message_thread_data)
|
| 443 |
+
|
| 444 |
+
print(f" Created {len(thread_data)} conversation threads")
|
| 445 |
+
|
| 446 |
+
def _update_users(self) -> None:
|
| 447 |
+
"""Update users table from tracked data."""
|
| 448 |
+
cursor = self.conn.cursor()
|
| 449 |
+
user_data = [
|
| 450 |
+
(user_id, data['display_name'], data['first_seen'],
|
| 451 |
+
data['last_seen'], data['count'])
|
| 452 |
+
for user_id, data in self.stats['users'].items()
|
| 453 |
+
]
|
| 454 |
+
|
| 455 |
+
cursor.executemany('''
|
| 456 |
+
INSERT OR REPLACE INTO users (user_id, display_name, first_seen, last_seen, message_count)
|
| 457 |
+
VALUES (?, ?, ?, ?, ?)
|
| 458 |
+
''', user_data)
|
| 459 |
+
|
| 460 |
+
def close(self) -> None:
|
| 461 |
+
"""Close database connection."""
|
| 462 |
+
self.conn.close()
|
| 463 |
+
|
| 464 |
+
|
| 465 |
+
class IncrementalIndexer:
|
| 466 |
+
"""
|
| 467 |
+
Incremental indexer for adding new JSON data to existing database.
|
| 468 |
+
|
| 469 |
+
Features:
|
| 470 |
+
- Loads existing message IDs into Bloom filter
|
| 471 |
+
- Only processes new messages
|
| 472 |
+
- Updates FTS index automatically
|
| 473 |
+
- Fast duplicate detection O(1)
|
| 474 |
+
"""
|
| 475 |
+
|
| 476 |
+
def __init__(self, db_path: str, batch_size: int = 1000):
|
| 477 |
+
self.db_path = db_path
|
| 478 |
+
self.batch_size = batch_size
|
| 479 |
+
|
| 480 |
+
if not os.path.exists(db_path):
|
| 481 |
+
raise FileNotFoundError(f"Database not found: {db_path}. Use OptimizedIndexer for initial import.")
|
| 482 |
+
|
| 483 |
+
self.conn = sqlite3.connect(db_path)
|
| 484 |
+
self.conn.row_factory = sqlite3.Row
|
| 485 |
+
|
| 486 |
+
# Batch buffers
|
| 487 |
+
self.message_batch: list[tuple] = []
|
| 488 |
+
self.entity_batch: list[tuple] = []
|
| 489 |
+
|
| 490 |
+
# Stats (must be initialized before _load_existing_ids)
|
| 491 |
+
self.stats = {
|
| 492 |
+
'total_in_file': 0,
|
| 493 |
+
'new_messages': 0,
|
| 494 |
+
'duplicates': 0,
|
| 495 |
+
'entities': 0,
|
| 496 |
+
'users_updated': 0
|
| 497 |
+
}
|
| 498 |
+
|
| 499 |
+
# Load existing message IDs into Bloom filter
|
| 500 |
+
self.bloom = BloomFilter(expected_items=2000000, fp_rate=0.001)
|
| 501 |
+
self._load_existing_ids()
|
| 502 |
+
|
| 503 |
+
def _load_existing_ids(self) -> None:
|
| 504 |
+
"""Load existing message IDs into Bloom filter for O(1) duplicate detection."""
|
| 505 |
+
cursor = self.conn.cursor()
|
| 506 |
+
cursor.execute("SELECT id FROM messages")
|
| 507 |
+
|
| 508 |
+
count = 0
|
| 509 |
+
for row in cursor:
|
| 510 |
+
self.bloom.add(f"msg_{row[0]}")
|
| 511 |
+
count += 1
|
| 512 |
+
|
| 513 |
+
print(f"Loaded {count:,} existing message IDs into Bloom filter")
|
| 514 |
+
self.stats['existing_count'] = count
|
| 515 |
+
|
| 516 |
+
def update_from_json(self, json_path: str, show_progress: bool = True) -> dict:
|
| 517 |
+
"""
|
| 518 |
+
Add new messages from JSON file to existing database.
|
| 519 |
+
|
| 520 |
+
Only messages that don't exist in the database will be added.
|
| 521 |
+
FTS5 index is updated automatically.
|
| 522 |
+
Uses streaming JSON parser (ijson) when available for constant memory usage.
|
| 523 |
+
"""
|
| 524 |
+
start_time = time.time()
|
| 525 |
+
|
| 526 |
+
# Count total for progress (streaming-aware)
|
| 527 |
+
total_hint = 0
|
| 528 |
+
if show_progress:
|
| 529 |
+
total_hint = count_messages(json_path)
|
| 530 |
+
print(f"Processing ~{total_hint:,} messages from {json_path}")
|
| 531 |
+
|
| 532 |
+
self.stats['total_in_file'] = total_hint
|
| 533 |
+
|
| 534 |
+
# Start transaction
|
| 535 |
+
self.conn.execute('BEGIN TRANSACTION')
|
| 536 |
+
|
| 537 |
+
try:
|
| 538 |
+
if HAS_IJSON:
|
| 539 |
+
structure = _detect_json_structure(json_path)
|
| 540 |
+
prefix = 'item' if structure == 'list' else 'messages.item'
|
| 541 |
+
with open(json_path, 'rb') as f:
|
| 542 |
+
for i, msg in enumerate(ijson.items(f, prefix)):
|
| 543 |
+
if msg.get('type') != 'message':
|
| 544 |
+
continue
|
| 545 |
+
parsed = parse_message(msg)
|
| 546 |
+
if parsed:
|
| 547 |
+
self._process_message(parsed)
|
| 548 |
+
if show_progress and (i + 1) % 10000 == 0:
|
| 549 |
+
print(f" Processed {i+1:,} - "
|
| 550 |
+
f"New: {self.stats['new_messages']:,}, "
|
| 551 |
+
f"Duplicates: {self.stats['duplicates']:,}")
|
| 552 |
+
else:
|
| 553 |
+
with open(json_path, 'r', encoding='utf-8') as f:
|
| 554 |
+
data = json.load(f)
|
| 555 |
+
messages = data if isinstance(data, list) else data.get('messages', [])
|
| 556 |
+
self.stats['total_in_file'] = len(messages)
|
| 557 |
+
for i, msg in enumerate(messages):
|
| 558 |
+
if msg.get('type') != 'message':
|
| 559 |
+
continue
|
| 560 |
+
parsed = parse_message(msg)
|
| 561 |
+
if parsed:
|
| 562 |
+
self._process_message(parsed)
|
| 563 |
+
if show_progress and (i + 1) % 10000 == 0:
|
| 564 |
+
print(f" Processed {i+1:,}/{len(messages):,} - "
|
| 565 |
+
f"New: {self.stats['new_messages']:,}, "
|
| 566 |
+
f"Duplicates: {self.stats['duplicates']:,}")
|
| 567 |
+
|
| 568 |
+
# Flush remaining
|
| 569 |
+
self._flush_batches()
|
| 570 |
+
|
| 571 |
+
# Update user stats
|
| 572 |
+
self._update_user_stats()
|
| 573 |
+
|
| 574 |
+
# Commit
|
| 575 |
+
self.conn.commit()
|
| 576 |
+
|
| 577 |
+
# Optimize FTS if we added new data
|
| 578 |
+
if self.stats['new_messages'] > 0:
|
| 579 |
+
print("Optimizing FTS index...")
|
| 580 |
+
self.conn.execute("INSERT INTO messages_fts(messages_fts) VALUES('optimize')")
|
| 581 |
+
self.conn.commit()
|
| 582 |
+
|
| 583 |
+
except Exception as e:
|
| 584 |
+
self.conn.rollback()
|
| 585 |
+
raise e
|
| 586 |
+
|
| 587 |
+
elapsed = time.time() - start_time
|
| 588 |
+
self.stats['elapsed_seconds'] = elapsed
|
| 589 |
+
|
| 590 |
+
return self.stats
|
| 591 |
+
|
| 592 |
+
def update_from_json_data(self, json_data: dict | list, show_progress: bool = False) -> dict:
|
| 593 |
+
"""
|
| 594 |
+
Add new messages from JSON data (already parsed, not from file).
|
| 595 |
+
|
| 596 |
+
Useful for API uploads.
|
| 597 |
+
"""
|
| 598 |
+
start_time = time.time()
|
| 599 |
+
|
| 600 |
+
messages = json_data if isinstance(json_data, list) else json_data.get('messages', [])
|
| 601 |
+
self.stats['total_in_file'] = len(messages)
|
| 602 |
+
|
| 603 |
+
# Start transaction
|
| 604 |
+
self.conn.execute('BEGIN TRANSACTION')
|
| 605 |
+
|
| 606 |
+
try:
|
| 607 |
+
for msg in messages:
|
| 608 |
+
if msg.get('type') != 'message':
|
| 609 |
+
continue
|
| 610 |
+
|
| 611 |
+
parsed = parse_message(msg)
|
| 612 |
+
if parsed:
|
| 613 |
+
self._process_message(parsed)
|
| 614 |
+
|
| 615 |
+
# Flush remaining
|
| 616 |
+
self._flush_batches()
|
| 617 |
+
|
| 618 |
+
# Update user stats
|
| 619 |
+
self._update_user_stats()
|
| 620 |
+
|
| 621 |
+
# Commit
|
| 622 |
+
self.conn.commit()
|
| 623 |
+
|
| 624 |
+
# Optimize FTS if we added new data
|
| 625 |
+
if self.stats['new_messages'] > 0:
|
| 626 |
+
self.conn.execute("INSERT INTO messages_fts(messages_fts) VALUES('optimize')")
|
| 627 |
+
self.conn.commit()
|
| 628 |
+
|
| 629 |
+
except Exception as e:
|
| 630 |
+
self.conn.rollback()
|
| 631 |
+
raise e
|
| 632 |
+
|
| 633 |
+
elapsed = time.time() - start_time
|
| 634 |
+
self.stats['elapsed_seconds'] = elapsed
|
| 635 |
+
|
| 636 |
+
return self.stats
|
| 637 |
+
|
| 638 |
+
def _process_message(self, msg: dict) -> None:
|
| 639 |
+
"""Process a single message, adding to batch if new."""
|
| 640 |
+
msg_id = msg['id']
|
| 641 |
+
msg_key = f"msg_{msg_id}"
|
| 642 |
+
|
| 643 |
+
# Check if already exists (Bloom filter first, then DB if needed)
|
| 644 |
+
if msg_key in self.bloom:
|
| 645 |
+
self.stats['duplicates'] += 1
|
| 646 |
+
return
|
| 647 |
+
|
| 648 |
+
# Add to Bloom filter
|
| 649 |
+
self.bloom.add(msg_key)
|
| 650 |
+
|
| 651 |
+
# Add to message batch
|
| 652 |
+
self.message_batch.append((
|
| 653 |
+
msg['id'], msg['type'], msg['date'], msg['date_unixtime'],
|
| 654 |
+
msg['from_name'], msg['from_id'], msg['reply_to_message_id'],
|
| 655 |
+
msg['forwarded_from'], msg['forwarded_from_id'], msg['text_plain'],
|
| 656 |
+
msg['text_length'], msg['has_media'], msg['has_photo'],
|
| 657 |
+
msg['has_links'], msg['has_mentions'], msg['is_edited'],
|
| 658 |
+
msg['edited_unixtime'], msg['photo_file_size'],
|
| 659 |
+
msg['photo_width'], msg['photo_height'], msg['raw_json']
|
| 660 |
+
))
|
| 661 |
+
|
| 662 |
+
# Add entities to batch
|
| 663 |
+
for entity in msg['entities']:
|
| 664 |
+
self.entity_batch.append((msg_id, entity['type'], entity['value']))
|
| 665 |
+
|
| 666 |
+
self.stats['new_messages'] += 1
|
| 667 |
+
|
| 668 |
+
# Flush if batch is full
|
| 669 |
+
if len(self.message_batch) >= self.batch_size:
|
| 670 |
+
self._flush_batches()
|
| 671 |
+
|
| 672 |
+
def _flush_batches(self) -> None:
|
| 673 |
+
"""Flush batch buffers to database."""
|
| 674 |
+
cursor = self.conn.cursor()
|
| 675 |
+
|
| 676 |
+
# Insert messages (FTS5 trigger will update automatically)
|
| 677 |
+
if self.message_batch:
|
| 678 |
+
cursor.executemany('''
|
| 679 |
+
INSERT OR IGNORE INTO messages (
|
| 680 |
+
id, type, date, date_unixtime, from_name, from_id,
|
| 681 |
+
reply_to_message_id, forwarded_from, forwarded_from_id,
|
| 682 |
+
text_plain, text_length, has_media, has_photo, has_links,
|
| 683 |
+
has_mentions, is_edited, edited_unixtime, photo_file_size,
|
| 684 |
+
photo_width, photo_height, raw_json
|
| 685 |
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
| 686 |
+
''', self.message_batch)
|
| 687 |
+
self.message_batch = []
|
| 688 |
+
|
| 689 |
+
# Insert entities
|
| 690 |
+
if self.entity_batch:
|
| 691 |
+
cursor.executemany('''
|
| 692 |
+
INSERT OR IGNORE INTO entities (message_id, type, value)
|
| 693 |
+
VALUES (?, ?, ?)
|
| 694 |
+
''', self.entity_batch)
|
| 695 |
+
self.stats['entities'] += len(self.entity_batch)
|
| 696 |
+
self.entity_batch = []
|
| 697 |
+
|
| 698 |
+
def _update_user_stats(self) -> None:
|
| 699 |
+
"""Update users table with aggregated stats."""
|
| 700 |
+
cursor = self.conn.cursor()
|
| 701 |
+
|
| 702 |
+
# Upsert users from messages
|
| 703 |
+
cursor.execute('''
|
| 704 |
+
INSERT OR REPLACE INTO users (user_id, display_name, first_seen, last_seen, message_count)
|
| 705 |
+
SELECT
|
| 706 |
+
from_id,
|
| 707 |
+
from_name,
|
| 708 |
+
MIN(date_unixtime),
|
| 709 |
+
MAX(date_unixtime),
|
| 710 |
+
COUNT(*)
|
| 711 |
+
FROM messages
|
| 712 |
+
WHERE from_id IS NOT NULL AND from_id != ''
|
| 713 |
+
GROUP BY from_id
|
| 714 |
+
''')
|
| 715 |
+
self.stats['users_updated'] = cursor.rowcount
|
| 716 |
+
|
| 717 |
+
def close(self) -> None:
|
| 718 |
+
"""Close database connection."""
|
| 719 |
+
self.conn.close()
|
| 720 |
+
|
| 721 |
+
|
| 722 |
+
def update_database(db_path: str, json_path: str) -> dict:
|
| 723 |
+
"""
|
| 724 |
+
Convenience function to update database with new JSON file.
|
| 725 |
+
|
| 726 |
+
Args:
|
| 727 |
+
db_path: Path to existing SQLite database
|
| 728 |
+
json_path: Path to new JSON file
|
| 729 |
+
|
| 730 |
+
Returns:
|
| 731 |
+
Statistics dict
|
| 732 |
+
"""
|
| 733 |
+
indexer = IncrementalIndexer(db_path)
|
| 734 |
+
try:
|
| 735 |
+
stats = indexer.update_from_json(json_path)
|
| 736 |
+
return stats
|
| 737 |
+
finally:
|
| 738 |
+
indexer.close()
|
| 739 |
+
|
| 740 |
+
|
| 741 |
+
def main():
|
| 742 |
+
parser = argparse.ArgumentParser(description='Index Telegram JSON export to SQLite (Optimized)')
|
| 743 |
+
parser.add_argument('json_file', help='Path to Telegram export JSON file')
|
| 744 |
+
parser.add_argument('--db', default='telegram.db', help='SQLite database path')
|
| 745 |
+
parser.add_argument('--batch-size', type=int, default=1000, help='Batch size for inserts')
|
| 746 |
+
parser.add_argument('--build-trigrams', action='store_true', help='Build trigram index for fuzzy search')
|
| 747 |
+
parser.add_argument('--no-graph', action='store_true', help='Skip building reply graph')
|
| 748 |
+
parser.add_argument('--quiet', action='store_true', help='Suppress progress output')
|
| 749 |
+
parser.add_argument('--update', action='store_true',
|
| 750 |
+
help='Update existing database (add only new messages)')
|
| 751 |
+
|
| 752 |
+
args = parser.parse_args()
|
| 753 |
+
|
| 754 |
+
if not os.path.exists(args.json_file):
|
| 755 |
+
print(f"Error: JSON file not found: {args.json_file}")
|
| 756 |
+
return 1
|
| 757 |
+
|
| 758 |
+
# Update mode: add new messages to existing database
|
| 759 |
+
if args.update:
|
| 760 |
+
if not os.path.exists(args.db):
|
| 761 |
+
print(f"Error: Database not found: {args.db}")
|
| 762 |
+
print("Use without --update flag for initial import")
|
| 763 |
+
return 1
|
| 764 |
+
|
| 765 |
+
print(f"{'='*50}")
|
| 766 |
+
print(f"INCREMENTAL UPDATE MODE")
|
| 767 |
+
print(f"{'='*50}")
|
| 768 |
+
print(f"Database: {args.db}")
|
| 769 |
+
print(f"New JSON: {args.json_file}")
|
| 770 |
+
print()
|
| 771 |
+
|
| 772 |
+
indexer = IncrementalIndexer(args.db, args.batch_size)
|
| 773 |
+
stats = indexer.update_from_json(args.json_file, show_progress=not args.quiet)
|
| 774 |
+
|
| 775 |
+
print(f"\n{'='*50}")
|
| 776 |
+
print(f"Update complete!")
|
| 777 |
+
print(f"{'='*50}")
|
| 778 |
+
print(f" Messages in file: {stats['total_in_file']:,}")
|
| 779 |
+
print(f" Already existed: {stats['duplicates']:,}")
|
| 780 |
+
print(f" New messages added: {stats['new_messages']:,}")
|
| 781 |
+
print(f" New entities: {stats['entities']:,}")
|
| 782 |
+
print(f" Time elapsed: {stats['elapsed_seconds']:.1f}s")
|
| 783 |
+
|
| 784 |
+
indexer.close()
|
| 785 |
+
return 0
|
| 786 |
+
|
| 787 |
+
# Initial import mode
|
| 788 |
+
print(f"Initializing database: {args.db}")
|
| 789 |
+
indexer = OptimizedIndexer(
|
| 790 |
+
db_path=args.db,
|
| 791 |
+
batch_size=args.batch_size,
|
| 792 |
+
build_trigrams=args.build_trigrams,
|
| 793 |
+
build_graph=not args.no_graph
|
| 794 |
+
)
|
| 795 |
+
|
| 796 |
+
print(f"Indexing: {args.json_file}")
|
| 797 |
+
stats = indexer.index_file(args.json_file, show_progress=not args.quiet)
|
| 798 |
+
|
| 799 |
+
print(f"\n{'='*50}")
|
| 800 |
+
print(f"Indexing complete!")
|
| 801 |
+
print(f"{'='*50}")
|
| 802 |
+
print(f" Messages indexed: {stats['messages']:,}")
|
| 803 |
+
print(f" Entities extracted: {stats['entities']:,}")
|
| 804 |
+
print(f" Unique users: {len(stats['users']):,}")
|
| 805 |
+
print(f" Duplicates skipped: {stats['duplicates']:,}")
|
| 806 |
+
if stats.get('trigrams'):
|
| 807 |
+
print(f" Trigrams indexed: {stats['trigrams']:,}")
|
| 808 |
+
print(f" Time elapsed: {stats['elapsed_seconds']:.1f}s")
|
| 809 |
+
print(f" Speed: {stats['messages_per_second']:.0f} msg/s")
|
| 810 |
+
print(f"\nDatabase saved to: {args.db}")
|
| 811 |
+
|
| 812 |
+
indexer.close()
|
| 813 |
+
return 0
|
| 814 |
+
|
| 815 |
+
|
| 816 |
+
if __name__ == '__main__':
|
| 817 |
+
exit(main())
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
flask>=3.0
|
| 2 |
+
gunicorn>=21.2
|
| 3 |
+
requests>=2.31
|
| 4 |
+
ijson>=3.2
|
schema.sql
ADDED
|
@@ -0,0 +1,200 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
-- Telegram Chat Indexing Schema (Optimized)
|
| 2 |
+
-- SQLite with FTS5 for full-text search + performance optimizations
|
| 3 |
+
|
| 4 |
+
-- ============================================
|
| 5 |
+
-- PRAGMA OPTIMIZATIONS
|
| 6 |
+
-- ============================================
|
| 7 |
+
PRAGMA journal_mode = WAL; -- Write-Ahead Logging for better concurrency
|
| 8 |
+
PRAGMA synchronous = NORMAL; -- Balance between safety and speed
|
| 9 |
+
PRAGMA cache_size = -64000; -- 64MB cache
|
| 10 |
+
PRAGMA temp_store = MEMORY; -- Store temp tables in memory
|
| 11 |
+
PRAGMA mmap_size = 268435456; -- 256MB memory-mapped I/O
|
| 12 |
+
|
| 13 |
+
-- ============================================
|
| 14 |
+
-- MAIN TABLES
|
| 15 |
+
-- ============================================
|
| 16 |
+
|
| 17 |
+
-- Main messages table
|
| 18 |
+
CREATE TABLE IF NOT EXISTS messages (
|
| 19 |
+
id INTEGER PRIMARY KEY,
|
| 20 |
+
type TEXT DEFAULT 'message',
|
| 21 |
+
date TEXT,
|
| 22 |
+
date_unixtime INTEGER NOT NULL,
|
| 23 |
+
from_name TEXT,
|
| 24 |
+
from_id TEXT NOT NULL,
|
| 25 |
+
reply_to_message_id INTEGER,
|
| 26 |
+
forwarded_from TEXT,
|
| 27 |
+
forwarded_from_id TEXT,
|
| 28 |
+
text_plain TEXT,
|
| 29 |
+
text_length INTEGER DEFAULT 0,
|
| 30 |
+
has_media INTEGER DEFAULT 0,
|
| 31 |
+
has_photo INTEGER DEFAULT 0,
|
| 32 |
+
has_links INTEGER DEFAULT 0,
|
| 33 |
+
has_mentions INTEGER DEFAULT 0,
|
| 34 |
+
is_edited INTEGER DEFAULT 0,
|
| 35 |
+
edited_unixtime INTEGER,
|
| 36 |
+
photo_file_size INTEGER,
|
| 37 |
+
photo_width INTEGER,
|
| 38 |
+
photo_height INTEGER,
|
| 39 |
+
raw_json TEXT
|
| 40 |
+
);
|
| 41 |
+
|
| 42 |
+
-- Users table (extracted from messages)
|
| 43 |
+
CREATE TABLE IF NOT EXISTS users (
|
| 44 |
+
user_id TEXT PRIMARY KEY,
|
| 45 |
+
display_name TEXT,
|
| 46 |
+
first_seen INTEGER,
|
| 47 |
+
last_seen INTEGER,
|
| 48 |
+
message_count INTEGER DEFAULT 0
|
| 49 |
+
);
|
| 50 |
+
|
| 51 |
+
-- Entities table (links, mentions, etc.)
|
| 52 |
+
CREATE TABLE IF NOT EXISTS entities (
|
| 53 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 54 |
+
message_id INTEGER NOT NULL,
|
| 55 |
+
type TEXT NOT NULL,
|
| 56 |
+
value TEXT NOT NULL,
|
| 57 |
+
FOREIGN KEY (message_id) REFERENCES messages(id) ON DELETE CASCADE
|
| 58 |
+
);
|
| 59 |
+
|
| 60 |
+
-- ============================================
|
| 61 |
+
-- GRAPH STRUCTURE FOR REPLY THREADS
|
| 62 |
+
-- ============================================
|
| 63 |
+
|
| 64 |
+
-- Pre-computed reply graph edges for fast traversal
|
| 65 |
+
CREATE TABLE IF NOT EXISTS reply_graph (
|
| 66 |
+
parent_id INTEGER NOT NULL,
|
| 67 |
+
child_id INTEGER NOT NULL,
|
| 68 |
+
depth INTEGER DEFAULT 1,
|
| 69 |
+
PRIMARY KEY (parent_id, child_id)
|
| 70 |
+
);
|
| 71 |
+
|
| 72 |
+
-- Conversation threads (connected components)
|
| 73 |
+
CREATE TABLE IF NOT EXISTS threads (
|
| 74 |
+
thread_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 75 |
+
root_message_id INTEGER UNIQUE,
|
| 76 |
+
message_count INTEGER DEFAULT 0,
|
| 77 |
+
first_message_time INTEGER,
|
| 78 |
+
last_message_time INTEGER,
|
| 79 |
+
participant_count INTEGER DEFAULT 0
|
| 80 |
+
);
|
| 81 |
+
|
| 82 |
+
-- Message to thread mapping
|
| 83 |
+
CREATE TABLE IF NOT EXISTS message_threads (
|
| 84 |
+
message_id INTEGER PRIMARY KEY,
|
| 85 |
+
thread_id INTEGER NOT NULL,
|
| 86 |
+
depth INTEGER DEFAULT 0,
|
| 87 |
+
FOREIGN KEY (thread_id) REFERENCES threads(thread_id)
|
| 88 |
+
);
|
| 89 |
+
|
| 90 |
+
-- ============================================
|
| 91 |
+
-- TRIGRAM INDEX FOR FUZZY SEARCH
|
| 92 |
+
-- ============================================
|
| 93 |
+
|
| 94 |
+
-- Trigrams for fuzzy/approximate string matching
|
| 95 |
+
CREATE TABLE IF NOT EXISTS trigrams (
|
| 96 |
+
trigram TEXT NOT NULL,
|
| 97 |
+
message_id INTEGER NOT NULL,
|
| 98 |
+
position INTEGER NOT NULL,
|
| 99 |
+
PRIMARY KEY (trigram, message_id, position)
|
| 100 |
+
);
|
| 101 |
+
|
| 102 |
+
-- ============================================
|
| 103 |
+
-- FTS5 FULL-TEXT SEARCH (OPTIMIZED)
|
| 104 |
+
-- ============================================
|
| 105 |
+
|
| 106 |
+
-- Full-text search with prefix index for autocomplete
|
| 107 |
+
CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts USING fts5(
|
| 108 |
+
text_plain,
|
| 109 |
+
from_name,
|
| 110 |
+
content='messages',
|
| 111 |
+
content_rowid='id',
|
| 112 |
+
tokenize='unicode61 remove_diacritics 2',
|
| 113 |
+
prefix='2 3 4' -- Enable prefix queries for autocomplete
|
| 114 |
+
);
|
| 115 |
+
|
| 116 |
+
-- Triggers to keep FTS in sync
|
| 117 |
+
CREATE TRIGGER IF NOT EXISTS messages_ai AFTER INSERT ON messages BEGIN
|
| 118 |
+
INSERT INTO messages_fts(rowid, text_plain, from_name)
|
| 119 |
+
VALUES (new.id, new.text_plain, new.from_name);
|
| 120 |
+
END;
|
| 121 |
+
|
| 122 |
+
CREATE TRIGGER IF NOT EXISTS messages_ad AFTER DELETE ON messages BEGIN
|
| 123 |
+
INSERT INTO messages_fts(messages_fts, rowid, text_plain, from_name)
|
| 124 |
+
VALUES ('delete', old.id, old.text_plain, old.from_name);
|
| 125 |
+
END;
|
| 126 |
+
|
| 127 |
+
CREATE TRIGGER IF NOT EXISTS messages_au AFTER UPDATE ON messages BEGIN
|
| 128 |
+
INSERT INTO messages_fts(messages_fts, rowid, text_plain, from_name)
|
| 129 |
+
VALUES ('delete', old.id, old.text_plain, old.from_name);
|
| 130 |
+
INSERT INTO messages_fts(rowid, text_plain, from_name)
|
| 131 |
+
VALUES (new.id, new.text_plain, new.from_name);
|
| 132 |
+
END;
|
| 133 |
+
|
| 134 |
+
-- ============================================
|
| 135 |
+
-- OPTIMIZED INDEXES
|
| 136 |
+
-- ============================================
|
| 137 |
+
|
| 138 |
+
-- Composite indexes for common query patterns
|
| 139 |
+
CREATE INDEX IF NOT EXISTS idx_messages_date ON messages(date_unixtime);
|
| 140 |
+
CREATE INDEX IF NOT EXISTS idx_messages_from ON messages(from_id);
|
| 141 |
+
CREATE INDEX IF NOT EXISTS idx_messages_from_date ON messages(from_id, date_unixtime);
|
| 142 |
+
CREATE INDEX IF NOT EXISTS idx_messages_reply ON messages(reply_to_message_id) WHERE reply_to_message_id IS NOT NULL;
|
| 143 |
+
CREATE INDEX IF NOT EXISTS idx_messages_forwarded ON messages(forwarded_from_id) WHERE forwarded_from_id IS NOT NULL;
|
| 144 |
+
CREATE INDEX IF NOT EXISTS idx_messages_has_links ON messages(has_links) WHERE has_links = 1;
|
| 145 |
+
CREATE INDEX IF NOT EXISTS idx_messages_has_media ON messages(has_media) WHERE has_media = 1;
|
| 146 |
+
|
| 147 |
+
-- Entity indexes
|
| 148 |
+
CREATE INDEX IF NOT EXISTS idx_entities_message ON entities(message_id);
|
| 149 |
+
CREATE INDEX IF NOT EXISTS idx_entities_type_value ON entities(type, value);
|
| 150 |
+
CREATE INDEX IF NOT EXISTS idx_entities_value ON entities(value);
|
| 151 |
+
|
| 152 |
+
-- Graph indexes
|
| 153 |
+
CREATE INDEX IF NOT EXISTS idx_reply_graph_child ON reply_graph(child_id);
|
| 154 |
+
CREATE INDEX IF NOT EXISTS idx_message_threads_thread ON message_threads(thread_id);
|
| 155 |
+
|
| 156 |
+
-- Trigram index
|
| 157 |
+
CREATE INDEX IF NOT EXISTS idx_trigrams_trigram ON trigrams(trigram);
|
| 158 |
+
|
| 159 |
+
-- ============================================
|
| 160 |
+
-- PARTICIPANTS TABLE (from Telethon API)
|
| 161 |
+
-- ============================================
|
| 162 |
+
|
| 163 |
+
CREATE TABLE IF NOT EXISTS participants (
|
| 164 |
+
user_id TEXT PRIMARY KEY,
|
| 165 |
+
first_name TEXT,
|
| 166 |
+
last_name TEXT,
|
| 167 |
+
username TEXT,
|
| 168 |
+
phone TEXT,
|
| 169 |
+
is_bot INTEGER DEFAULT 0,
|
| 170 |
+
is_admin INTEGER DEFAULT 0,
|
| 171 |
+
is_creator INTEGER DEFAULT 0,
|
| 172 |
+
is_premium INTEGER DEFAULT 0,
|
| 173 |
+
join_date INTEGER,
|
| 174 |
+
last_status TEXT DEFAULT 'unknown',
|
| 175 |
+
last_online INTEGER,
|
| 176 |
+
about TEXT,
|
| 177 |
+
updated_at INTEGER
|
| 178 |
+
);
|
| 179 |
+
|
| 180 |
+
-- ============================================
|
| 181 |
+
-- STATISTICS TABLE FOR FAST AGGREGATIONS
|
| 182 |
+
-- ============================================
|
| 183 |
+
|
| 184 |
+
CREATE TABLE IF NOT EXISTS stats_cache (
|
| 185 |
+
key TEXT PRIMARY KEY,
|
| 186 |
+
value TEXT,
|
| 187 |
+
updated_at INTEGER
|
| 188 |
+
);
|
| 189 |
+
|
| 190 |
+
-- ============================================
|
| 191 |
+
-- VECTOR EMBEDDINGS TABLE (OPTIONAL)
|
| 192 |
+
-- ============================================
|
| 193 |
+
|
| 194 |
+
-- For semantic search with FAISS
|
| 195 |
+
CREATE TABLE IF NOT EXISTS embeddings (
|
| 196 |
+
message_id INTEGER PRIMARY KEY,
|
| 197 |
+
embedding BLOB, -- Serialized numpy array
|
| 198 |
+
model_name TEXT DEFAULT 'default',
|
| 199 |
+
FOREIGN KEY (message_id) REFERENCES messages(id) ON DELETE CASCADE
|
| 200 |
+
);
|
search.py
ADDED
|
@@ -0,0 +1,564 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Telegram Chat Search Utilities (Optimized)
|
| 4 |
+
|
| 5 |
+
Features:
|
| 6 |
+
- Full-text search with BM25 ranking
|
| 7 |
+
- LRU caching for repeated queries
|
| 8 |
+
- Fuzzy search with trigram similarity
|
| 9 |
+
- Thread traversal with DFS/BFS
|
| 10 |
+
- Autocomplete suggestions
|
| 11 |
+
|
| 12 |
+
Usage:
|
| 13 |
+
python search.py <query> [options]
|
| 14 |
+
python search.py "שלום" --db telegram.db
|
| 15 |
+
python search.py "link" --user user123 --fuzzy
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
import sqlite3
|
| 19 |
+
import argparse
|
| 20 |
+
from datetime import datetime
|
| 21 |
+
from typing import Optional
|
| 22 |
+
from functools import lru_cache
|
| 23 |
+
|
| 24 |
+
from data_structures import LRUCache, Trie, TrigramIndex, ReplyGraph, lru_cached
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
class TelegramSearch:
|
| 28 |
+
"""
|
| 29 |
+
High-performance search interface for indexed Telegram messages.
|
| 30 |
+
|
| 31 |
+
Features:
|
| 32 |
+
- Full-text search with FTS5 and BM25 ranking
|
| 33 |
+
- Query result caching (LRU)
|
| 34 |
+
- Fuzzy/approximate search with trigrams
|
| 35 |
+
- Thread reconstruction with graph traversal
|
| 36 |
+
- Autocomplete for usernames and common terms
|
| 37 |
+
"""
|
| 38 |
+
|
| 39 |
+
def __init__(self, db_path: str = 'telegram.db', cache_size: int = 1000):
|
| 40 |
+
self.db_path = db_path
|
| 41 |
+
self.conn = sqlite3.connect(db_path)
|
| 42 |
+
self.conn.row_factory = sqlite3.Row
|
| 43 |
+
|
| 44 |
+
# Initialize caches
|
| 45 |
+
self.query_cache = LRUCache(maxsize=cache_size)
|
| 46 |
+
self.user_trie: Optional[Trie] = None
|
| 47 |
+
self.trigram_index: Optional[TrigramIndex] = None
|
| 48 |
+
self.reply_graph: Optional[ReplyGraph] = None
|
| 49 |
+
|
| 50 |
+
def close(self):
|
| 51 |
+
self.conn.close()
|
| 52 |
+
|
| 53 |
+
def __enter__(self):
|
| 54 |
+
return self
|
| 55 |
+
|
| 56 |
+
def __exit__(self, *args):
|
| 57 |
+
self.close()
|
| 58 |
+
|
| 59 |
+
# ==========================================
|
| 60 |
+
# FULL-TEXT SEARCH
|
| 61 |
+
# ==========================================
|
| 62 |
+
|
| 63 |
+
def search(
|
| 64 |
+
self,
|
| 65 |
+
query: str,
|
| 66 |
+
user_id: Optional[str] = None,
|
| 67 |
+
from_date: Optional[int] = None,
|
| 68 |
+
to_date: Optional[int] = None,
|
| 69 |
+
has_links: Optional[bool] = None,
|
| 70 |
+
has_mentions: Optional[bool] = None,
|
| 71 |
+
has_media: Optional[bool] = None,
|
| 72 |
+
limit: int = 100,
|
| 73 |
+
offset: int = 0,
|
| 74 |
+
use_cache: bool = True
|
| 75 |
+
) -> list[dict]:
|
| 76 |
+
"""
|
| 77 |
+
Full-text search with BM25 ranking and optional filters.
|
| 78 |
+
|
| 79 |
+
Args:
|
| 80 |
+
query: FTS5 query (supports AND, OR, NOT, "phrase", prefix*)
|
| 81 |
+
user_id: Filter by user ID
|
| 82 |
+
from_date: Unix timestamp lower bound
|
| 83 |
+
to_date: Unix timestamp upper bound
|
| 84 |
+
has_links/has_mentions/has_media: Boolean filters
|
| 85 |
+
limit: Max results
|
| 86 |
+
offset: Pagination offset
|
| 87 |
+
use_cache: Whether to use LRU cache
|
| 88 |
+
|
| 89 |
+
Returns:
|
| 90 |
+
List of message dicts with relevance scores
|
| 91 |
+
"""
|
| 92 |
+
# Build cache key
|
| 93 |
+
cache_key = f"search:{query}:{user_id}:{from_date}:{to_date}:{has_links}:{has_mentions}:{has_media}:{limit}:{offset}"
|
| 94 |
+
|
| 95 |
+
if use_cache:
|
| 96 |
+
cached = self.query_cache.get(cache_key)
|
| 97 |
+
if cached is not None:
|
| 98 |
+
return cached
|
| 99 |
+
|
| 100 |
+
# Build query conditions
|
| 101 |
+
conditions = []
|
| 102 |
+
params = []
|
| 103 |
+
|
| 104 |
+
if user_id:
|
| 105 |
+
conditions.append("m.from_id = ?")
|
| 106 |
+
params.append(user_id)
|
| 107 |
+
|
| 108 |
+
if from_date:
|
| 109 |
+
conditions.append("m.date_unixtime >= ?")
|
| 110 |
+
params.append(from_date)
|
| 111 |
+
|
| 112 |
+
if to_date:
|
| 113 |
+
conditions.append("m.date_unixtime <= ?")
|
| 114 |
+
params.append(to_date)
|
| 115 |
+
|
| 116 |
+
if has_links is not None:
|
| 117 |
+
conditions.append("m.has_links = ?")
|
| 118 |
+
params.append(1 if has_links else 0)
|
| 119 |
+
|
| 120 |
+
if has_mentions is not None:
|
| 121 |
+
conditions.append("m.has_mentions = ?")
|
| 122 |
+
params.append(1 if has_mentions else 0)
|
| 123 |
+
|
| 124 |
+
if has_media is not None:
|
| 125 |
+
conditions.append("m.has_media = ?")
|
| 126 |
+
params.append(1 if has_media else 0)
|
| 127 |
+
|
| 128 |
+
where_clause = " AND ".join(conditions) if conditions else "1=1"
|
| 129 |
+
|
| 130 |
+
sql = f'''
|
| 131 |
+
SELECT
|
| 132 |
+
m.id,
|
| 133 |
+
m.date,
|
| 134 |
+
m.date_unixtime,
|
| 135 |
+
m.from_name,
|
| 136 |
+
m.from_id,
|
| 137 |
+
m.text_plain,
|
| 138 |
+
m.reply_to_message_id,
|
| 139 |
+
m.forwarded_from,
|
| 140 |
+
m.has_links,
|
| 141 |
+
m.has_mentions,
|
| 142 |
+
m.has_media,
|
| 143 |
+
bm25(messages_fts, 1.0, 0.5) as relevance
|
| 144 |
+
FROM messages_fts
|
| 145 |
+
JOIN messages m ON messages_fts.rowid = m.id
|
| 146 |
+
WHERE messages_fts MATCH ?
|
| 147 |
+
AND {where_clause}
|
| 148 |
+
ORDER BY relevance
|
| 149 |
+
LIMIT ? OFFSET ?
|
| 150 |
+
'''
|
| 151 |
+
|
| 152 |
+
params = [query] + params + [limit, offset]
|
| 153 |
+
|
| 154 |
+
cursor = self.conn.execute(sql, params)
|
| 155 |
+
results = [dict(row) for row in cursor.fetchall()]
|
| 156 |
+
|
| 157 |
+
if use_cache:
|
| 158 |
+
self.query_cache.put(cache_key, results)
|
| 159 |
+
|
| 160 |
+
return results
|
| 161 |
+
|
| 162 |
+
def search_prefix(self, prefix: str, limit: int = 100) -> list[dict]:
|
| 163 |
+
"""
|
| 164 |
+
Search using prefix matching (autocomplete-style).
|
| 165 |
+
|
| 166 |
+
Uses FTS5 prefix index for fast prefix queries.
|
| 167 |
+
"""
|
| 168 |
+
# FTS5 prefix search syntax
|
| 169 |
+
query = f'{prefix}*'
|
| 170 |
+
return self.search(query, limit=limit, use_cache=True)
|
| 171 |
+
|
| 172 |
+
# ==========================================
|
| 173 |
+
# FUZZY SEARCH
|
| 174 |
+
# ==========================================
|
| 175 |
+
|
| 176 |
+
def fuzzy_search(
|
| 177 |
+
self,
|
| 178 |
+
query: str,
|
| 179 |
+
threshold: float = 0.3,
|
| 180 |
+
limit: int = 50
|
| 181 |
+
) -> list[dict]:
|
| 182 |
+
"""
|
| 183 |
+
Fuzzy search using trigram similarity.
|
| 184 |
+
|
| 185 |
+
Finds messages even with typos or slight variations.
|
| 186 |
+
|
| 187 |
+
Args:
|
| 188 |
+
query: Search query
|
| 189 |
+
threshold: Minimum similarity (0-1)
|
| 190 |
+
limit: Max results
|
| 191 |
+
|
| 192 |
+
Returns:
|
| 193 |
+
List of (message, similarity) tuples
|
| 194 |
+
"""
|
| 195 |
+
# Build trigram index if not exists
|
| 196 |
+
if self.trigram_index is None:
|
| 197 |
+
self._build_trigram_index()
|
| 198 |
+
|
| 199 |
+
# Search trigram index
|
| 200 |
+
matches = self.trigram_index.search(query, threshold=threshold, limit=limit)
|
| 201 |
+
|
| 202 |
+
# Fetch full messages
|
| 203 |
+
results = []
|
| 204 |
+
for msg_id, similarity in matches:
|
| 205 |
+
cursor = self.conn.execute(
|
| 206 |
+
'SELECT * FROM messages WHERE id = ?',
|
| 207 |
+
(msg_id,)
|
| 208 |
+
)
|
| 209 |
+
row = cursor.fetchone()
|
| 210 |
+
if row:
|
| 211 |
+
msg = dict(row)
|
| 212 |
+
msg['similarity'] = similarity
|
| 213 |
+
results.append(msg)
|
| 214 |
+
|
| 215 |
+
return results
|
| 216 |
+
|
| 217 |
+
def _build_trigram_index(self) -> None:
|
| 218 |
+
"""Build in-memory trigram index from database."""
|
| 219 |
+
print("Building trigram index (first time only)...")
|
| 220 |
+
self.trigram_index = TrigramIndex()
|
| 221 |
+
|
| 222 |
+
cursor = self.conn.execute(
|
| 223 |
+
'SELECT id, text_plain FROM messages WHERE text_plain IS NOT NULL'
|
| 224 |
+
)
|
| 225 |
+
for row in cursor.fetchall():
|
| 226 |
+
self.trigram_index.add(row[0], row[1])
|
| 227 |
+
|
| 228 |
+
print(f"Trigram index built: {len(self.trigram_index)} documents")
|
| 229 |
+
|
| 230 |
+
# ==========================================
|
| 231 |
+
# THREAD TRAVERSAL
|
| 232 |
+
# ==========================================
|
| 233 |
+
|
| 234 |
+
def get_thread_dfs(self, message_id: int) -> list[dict]:
|
| 235 |
+
"""
|
| 236 |
+
Get full conversation thread using DFS traversal.
|
| 237 |
+
|
| 238 |
+
Returns messages in depth-first order (follows reply chains deep).
|
| 239 |
+
"""
|
| 240 |
+
if self.reply_graph is None:
|
| 241 |
+
self._build_reply_graph()
|
| 242 |
+
|
| 243 |
+
# Find thread root
|
| 244 |
+
root_id = self.reply_graph.get_thread_root(message_id)
|
| 245 |
+
|
| 246 |
+
# DFS traversal
|
| 247 |
+
msg_ids = self.reply_graph.dfs_descendants(root_id)
|
| 248 |
+
|
| 249 |
+
# Fetch messages in order
|
| 250 |
+
return self._fetch_messages_ordered(msg_ids)
|
| 251 |
+
|
| 252 |
+
def get_thread_bfs(self, message_id: int) -> list[dict]:
|
| 253 |
+
"""
|
| 254 |
+
Get conversation thread using BFS traversal.
|
| 255 |
+
|
| 256 |
+
Returns messages level by level.
|
| 257 |
+
"""
|
| 258 |
+
if self.reply_graph is None:
|
| 259 |
+
self._build_reply_graph()
|
| 260 |
+
|
| 261 |
+
root_id = self.reply_graph.get_thread_root(message_id)
|
| 262 |
+
msg_ids = self.reply_graph.bfs_descendants(root_id)
|
| 263 |
+
|
| 264 |
+
return self._fetch_messages_ordered(msg_ids)
|
| 265 |
+
|
| 266 |
+
def get_thread_with_depth(self, message_id: int) -> list[tuple[dict, int]]:
|
| 267 |
+
"""
|
| 268 |
+
Get thread with depth information for each message.
|
| 269 |
+
|
| 270 |
+
Returns list of (message, depth) tuples.
|
| 271 |
+
"""
|
| 272 |
+
if self.reply_graph is None:
|
| 273 |
+
self._build_reply_graph()
|
| 274 |
+
|
| 275 |
+
root_id = self.reply_graph.get_thread_root(message_id)
|
| 276 |
+
items = self.reply_graph.bfs_with_depth(root_id)
|
| 277 |
+
|
| 278 |
+
results = []
|
| 279 |
+
for msg_id, depth in items:
|
| 280 |
+
cursor = self.conn.execute(
|
| 281 |
+
'SELECT * FROM messages WHERE id = ?',
|
| 282 |
+
(msg_id,)
|
| 283 |
+
)
|
| 284 |
+
row = cursor.fetchone()
|
| 285 |
+
if row:
|
| 286 |
+
results.append((dict(row), depth))
|
| 287 |
+
|
| 288 |
+
return results
|
| 289 |
+
|
| 290 |
+
def get_replies(self, message_id: int) -> list[dict]:
|
| 291 |
+
"""Get all direct replies to a message."""
|
| 292 |
+
if self.reply_graph is None:
|
| 293 |
+
self._build_reply_graph()
|
| 294 |
+
|
| 295 |
+
child_ids = self.reply_graph.get_children(message_id)
|
| 296 |
+
return self._fetch_messages_ordered(child_ids)
|
| 297 |
+
|
| 298 |
+
def get_conversation_path(self, message_id: int) -> list[dict]:
|
| 299 |
+
"""Get the path from thread root to this message."""
|
| 300 |
+
if self.reply_graph is None:
|
| 301 |
+
self._build_reply_graph()
|
| 302 |
+
|
| 303 |
+
path_ids = self.reply_graph.get_thread_path(message_id)
|
| 304 |
+
return self._fetch_messages_ordered(path_ids)
|
| 305 |
+
|
| 306 |
+
def _build_reply_graph(self) -> None:
|
| 307 |
+
"""Build in-memory reply graph from database."""
|
| 308 |
+
print("Building reply graph (first time only)...")
|
| 309 |
+
self.reply_graph = ReplyGraph()
|
| 310 |
+
|
| 311 |
+
cursor = self.conn.execute(
|
| 312 |
+
'SELECT id, reply_to_message_id FROM messages'
|
| 313 |
+
)
|
| 314 |
+
for row in cursor.fetchall():
|
| 315 |
+
self.reply_graph.add_message(row[0], row[1])
|
| 316 |
+
|
| 317 |
+
print(f"Reply graph built: {self.reply_graph.stats}")
|
| 318 |
+
|
| 319 |
+
def _fetch_messages_ordered(self, msg_ids: list[int]) -> list[dict]:
|
| 320 |
+
"""Fetch messages preserving the order of IDs."""
|
| 321 |
+
if not msg_ids:
|
| 322 |
+
return []
|
| 323 |
+
|
| 324 |
+
placeholders = ','.join('?' * len(msg_ids))
|
| 325 |
+
cursor = self.conn.execute(
|
| 326 |
+
f'SELECT * FROM messages WHERE id IN ({placeholders})',
|
| 327 |
+
msg_ids
|
| 328 |
+
)
|
| 329 |
+
|
| 330 |
+
# Create lookup dict
|
| 331 |
+
msg_map = {row['id']: dict(row) for row in cursor.fetchall()}
|
| 332 |
+
|
| 333 |
+
# Return in original order
|
| 334 |
+
return [msg_map[mid] for mid in msg_ids if mid in msg_map]
|
| 335 |
+
|
| 336 |
+
# ==========================================
|
| 337 |
+
# AUTOCOMPLETE
|
| 338 |
+
# ==========================================
|
| 339 |
+
|
| 340 |
+
def autocomplete_user(self, prefix: str, limit: int = 10) -> list[str]:
|
| 341 |
+
"""
|
| 342 |
+
Autocomplete username suggestions.
|
| 343 |
+
|
| 344 |
+
Uses Trie for O(p + k) lookup where p=prefix length, k=results.
|
| 345 |
+
"""
|
| 346 |
+
if self.user_trie is None:
|
| 347 |
+
self._build_user_trie()
|
| 348 |
+
|
| 349 |
+
return self.user_trie.autocomplete(prefix, limit=limit)
|
| 350 |
+
|
| 351 |
+
def _build_user_trie(self) -> None:
|
| 352 |
+
"""Build Trie index for usernames."""
|
| 353 |
+
self.user_trie = Trie()
|
| 354 |
+
|
| 355 |
+
cursor = self.conn.execute('SELECT user_id, display_name FROM users')
|
| 356 |
+
for row in cursor.fetchall():
|
| 357 |
+
if row['display_name']:
|
| 358 |
+
self.user_trie.insert(row['display_name'], data=row['user_id'])
|
| 359 |
+
if row['user_id']:
|
| 360 |
+
self.user_trie.insert(row['user_id'], data=row['user_id'])
|
| 361 |
+
|
| 362 |
+
# ==========================================
|
| 363 |
+
# CONVENIENCE METHODS
|
| 364 |
+
# ==========================================
|
| 365 |
+
|
| 366 |
+
def search_by_user(self, user_id: str, limit: int = 100) -> list[dict]:
|
| 367 |
+
"""Get all messages from a specific user."""
|
| 368 |
+
sql = '''
|
| 369 |
+
SELECT * FROM messages
|
| 370 |
+
WHERE from_id = ?
|
| 371 |
+
ORDER BY date_unixtime DESC
|
| 372 |
+
LIMIT ?
|
| 373 |
+
'''
|
| 374 |
+
cursor = self.conn.execute(sql, (user_id, limit))
|
| 375 |
+
return [dict(row) for row in cursor.fetchall()]
|
| 376 |
+
|
| 377 |
+
def search_by_date_range(
|
| 378 |
+
self,
|
| 379 |
+
from_date: int,
|
| 380 |
+
to_date: int,
|
| 381 |
+
limit: int = 1000
|
| 382 |
+
) -> list[dict]:
|
| 383 |
+
"""Get messages within a date range."""
|
| 384 |
+
sql = '''
|
| 385 |
+
SELECT * FROM messages
|
| 386 |
+
WHERE date_unixtime BETWEEN ? AND ?
|
| 387 |
+
ORDER BY date_unixtime ASC
|
| 388 |
+
LIMIT ?
|
| 389 |
+
'''
|
| 390 |
+
cursor = self.conn.execute(sql, (from_date, to_date, limit))
|
| 391 |
+
return [dict(row) for row in cursor.fetchall()]
|
| 392 |
+
|
| 393 |
+
def get_links(self, limit: int = 100) -> list[dict]:
|
| 394 |
+
"""Get all extracted links."""
|
| 395 |
+
sql = '''
|
| 396 |
+
SELECT e.value as url, e.message_id, m.from_name, m.date
|
| 397 |
+
FROM entities e
|
| 398 |
+
JOIN messages m ON e.message_id = m.id
|
| 399 |
+
WHERE e.type = 'link'
|
| 400 |
+
ORDER BY m.date_unixtime DESC
|
| 401 |
+
LIMIT ?
|
| 402 |
+
'''
|
| 403 |
+
cursor = self.conn.execute(sql, (limit,))
|
| 404 |
+
return [dict(row) for row in cursor.fetchall()]
|
| 405 |
+
|
| 406 |
+
def get_mentions(self, username: Optional[str] = None, limit: int = 100) -> list[dict]:
|
| 407 |
+
"""Get mentions, optionally filtered by username."""
|
| 408 |
+
if username:
|
| 409 |
+
sql = '''
|
| 410 |
+
SELECT e.value as mention, e.message_id, m.from_name, m.text_plain, m.date
|
| 411 |
+
FROM entities e
|
| 412 |
+
JOIN messages m ON e.message_id = m.id
|
| 413 |
+
WHERE e.type = 'mention' AND e.value LIKE ?
|
| 414 |
+
ORDER BY m.date_unixtime DESC
|
| 415 |
+
LIMIT ?
|
| 416 |
+
'''
|
| 417 |
+
cursor = self.conn.execute(sql, (f'%{username}%', limit))
|
| 418 |
+
else:
|
| 419 |
+
sql = '''
|
| 420 |
+
SELECT e.value as mention, e.message_id, m.from_name, m.text_plain, m.date
|
| 421 |
+
FROM entities e
|
| 422 |
+
JOIN messages m ON e.message_id = m.id
|
| 423 |
+
WHERE e.type = 'mention'
|
| 424 |
+
ORDER BY m.date_unixtime DESC
|
| 425 |
+
LIMIT ?
|
| 426 |
+
'''
|
| 427 |
+
cursor = self.conn.execute(sql, (limit,))
|
| 428 |
+
|
| 429 |
+
return [dict(row) for row in cursor.fetchall()]
|
| 430 |
+
|
| 431 |
+
@property
|
| 432 |
+
def cache_stats(self) -> dict:
|
| 433 |
+
"""Get cache statistics."""
|
| 434 |
+
return self.query_cache.stats
|
| 435 |
+
|
| 436 |
+
|
| 437 |
+
def format_result(msg: dict, show_depth: bool = False, depth: int = 0) -> str:
|
| 438 |
+
"""Format a message for display."""
|
| 439 |
+
date_str = msg.get('date', 'Unknown date')
|
| 440 |
+
from_name = msg.get('from_name', 'Unknown')
|
| 441 |
+
text = msg.get('text_plain', '')[:200]
|
| 442 |
+
if len(msg.get('text_plain', '')) > 200:
|
| 443 |
+
text += '...'
|
| 444 |
+
|
| 445 |
+
flags = []
|
| 446 |
+
if msg.get('has_links'):
|
| 447 |
+
flags.append('[link]')
|
| 448 |
+
if msg.get('has_mentions'):
|
| 449 |
+
flags.append('[mention]')
|
| 450 |
+
if msg.get('has_media'):
|
| 451 |
+
flags.append('[media]')
|
| 452 |
+
if msg.get('similarity'):
|
| 453 |
+
flags.append(f'[sim:{msg["similarity"]:.2f}]')
|
| 454 |
+
if msg.get('relevance'):
|
| 455 |
+
flags.append(f'[rel:{abs(msg["relevance"]):.2f}]')
|
| 456 |
+
|
| 457 |
+
flags_str = ' '.join(flags)
|
| 458 |
+
indent = ' ' * depth if show_depth else ''
|
| 459 |
+
return f"{indent}[{date_str}] {from_name}: {text} {flags_str}"
|
| 460 |
+
|
| 461 |
+
|
| 462 |
+
def main():
|
| 463 |
+
parser = argparse.ArgumentParser(description='Search indexed Telegram messages')
|
| 464 |
+
parser.add_argument('query', nargs='?', help='Search query')
|
| 465 |
+
parser.add_argument('--db', default='telegram.db', help='Database path')
|
| 466 |
+
parser.add_argument('--user', help='Filter by user ID')
|
| 467 |
+
parser.add_argument('--from-date', help='From date (YYYY-MM-DD)')
|
| 468 |
+
parser.add_argument('--to-date', help='To date (YYYY-MM-DD)')
|
| 469 |
+
parser.add_argument('--links', action='store_true', help='Show only messages with links')
|
| 470 |
+
parser.add_argument('--mentions', action='store_true', help='Show only messages with mentions')
|
| 471 |
+
parser.add_argument('--media', action='store_true', help='Show only messages with media')
|
| 472 |
+
parser.add_argument('--limit', type=int, default=50, help='Max results')
|
| 473 |
+
parser.add_argument('--fuzzy', action='store_true', help='Use fuzzy search')
|
| 474 |
+
parser.add_argument('--threshold', type=float, default=0.3, help='Fuzzy match threshold')
|
| 475 |
+
parser.add_argument('--thread', type=int, help='Show thread for message ID')
|
| 476 |
+
parser.add_argument('--list-links', action='store_true', help='List all extracted links')
|
| 477 |
+
parser.add_argument('--list-mentions', action='store_true', help='List all mentions')
|
| 478 |
+
parser.add_argument('--autocomplete', help='Autocomplete username')
|
| 479 |
+
parser.add_argument('--cache-stats', action='store_true', help='Show cache statistics')
|
| 480 |
+
|
| 481 |
+
args = parser.parse_args()
|
| 482 |
+
|
| 483 |
+
with TelegramSearch(args.db) as search:
|
| 484 |
+
# Show thread
|
| 485 |
+
if args.thread:
|
| 486 |
+
print(f"Thread containing message {args.thread}:\n")
|
| 487 |
+
thread = search.get_thread_with_depth(args.thread)
|
| 488 |
+
for msg, depth in thread:
|
| 489 |
+
print(format_result(msg, show_depth=True, depth=depth))
|
| 490 |
+
return
|
| 491 |
+
|
| 492 |
+
# Autocomplete
|
| 493 |
+
if args.autocomplete:
|
| 494 |
+
suggestions = search.autocomplete_user(args.autocomplete)
|
| 495 |
+
print(f"Suggestions for '{args.autocomplete}':")
|
| 496 |
+
for s in suggestions:
|
| 497 |
+
print(f" {s}")
|
| 498 |
+
return
|
| 499 |
+
|
| 500 |
+
# List links
|
| 501 |
+
if args.list_links:
|
| 502 |
+
links = search.get_links(args.limit)
|
| 503 |
+
print(f"Found {len(links)} links:\n")
|
| 504 |
+
for link in links:
|
| 505 |
+
print(f" {link['url']}")
|
| 506 |
+
print(f" From: {link['from_name']} at {link['date']}")
|
| 507 |
+
return
|
| 508 |
+
|
| 509 |
+
# List mentions
|
| 510 |
+
if args.list_mentions:
|
| 511 |
+
mentions = search.get_mentions(limit=args.limit)
|
| 512 |
+
print(f"Found {len(mentions)} mentions:\n")
|
| 513 |
+
for m in mentions:
|
| 514 |
+
print(f" {m['mention']} by {m['from_name']}")
|
| 515 |
+
return
|
| 516 |
+
|
| 517 |
+
# Cache stats
|
| 518 |
+
if args.cache_stats:
|
| 519 |
+
print(f"Cache stats: {search.cache_stats}")
|
| 520 |
+
return
|
| 521 |
+
|
| 522 |
+
if not args.query:
|
| 523 |
+
parser.print_help()
|
| 524 |
+
return
|
| 525 |
+
|
| 526 |
+
# Parse dates
|
| 527 |
+
from_ts = None
|
| 528 |
+
to_ts = None
|
| 529 |
+
if args.from_date:
|
| 530 |
+
from_ts = int(datetime.strptime(args.from_date, '%Y-%m-%d').timestamp())
|
| 531 |
+
if args.to_date:
|
| 532 |
+
to_ts = int(datetime.strptime(args.to_date, '%Y-%m-%d').timestamp())
|
| 533 |
+
|
| 534 |
+
# Fuzzy or regular search
|
| 535 |
+
if args.fuzzy:
|
| 536 |
+
results = search.fuzzy_search(
|
| 537 |
+
query=args.query,
|
| 538 |
+
threshold=args.threshold,
|
| 539 |
+
limit=args.limit
|
| 540 |
+
)
|
| 541 |
+
print(f"Found {len(results)} fuzzy matches for '{args.query}':\n")
|
| 542 |
+
else:
|
| 543 |
+
results = search.search(
|
| 544 |
+
query=args.query,
|
| 545 |
+
user_id=args.user,
|
| 546 |
+
from_date=from_ts,
|
| 547 |
+
to_date=to_ts,
|
| 548 |
+
has_links=True if args.links else None,
|
| 549 |
+
has_mentions=True if args.mentions else None,
|
| 550 |
+
has_media=True if args.media else None,
|
| 551 |
+
limit=args.limit
|
| 552 |
+
)
|
| 553 |
+
print(f"Found {len(results)} results for '{args.query}':\n")
|
| 554 |
+
|
| 555 |
+
for msg in results:
|
| 556 |
+
print(format_result(msg))
|
| 557 |
+
print()
|
| 558 |
+
|
| 559 |
+
# Show cache stats
|
| 560 |
+
print(f"\nCache: {search.cache_stats}")
|
| 561 |
+
|
| 562 |
+
|
| 563 |
+
if __name__ == '__main__':
|
| 564 |
+
main()
|
semantic_search.py
ADDED
|
@@ -0,0 +1,411 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Semantic Search using pre-computed embeddings from Colab.
|
| 3 |
+
Lightweight - only needs sentence-transformers for query encoding.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import sqlite3
|
| 7 |
+
import numpy as np
|
| 8 |
+
from typing import List, Dict, Any, Optional
|
| 9 |
+
|
| 10 |
+
# Try importing sentence-transformers
|
| 11 |
+
try:
|
| 12 |
+
from sentence_transformers import SentenceTransformer
|
| 13 |
+
HAS_TRANSFORMERS = True
|
| 14 |
+
except ImportError:
|
| 15 |
+
HAS_TRANSFORMERS = False
|
| 16 |
+
SentenceTransformer = None
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class SemanticSearch:
|
| 20 |
+
"""
|
| 21 |
+
Semantic search using pre-computed embeddings.
|
| 22 |
+
|
| 23 |
+
The embeddings.db file is created by running the Colab notebook.
|
| 24 |
+
This class just loads and searches them.
|
| 25 |
+
"""
|
| 26 |
+
|
| 27 |
+
def __init__(self, embeddings_db: str = 'embeddings.db', messages_db: str = 'telegram.db'):
|
| 28 |
+
self.embeddings_db = embeddings_db
|
| 29 |
+
self.messages_db = messages_db
|
| 30 |
+
self.model = None
|
| 31 |
+
self.embeddings_loaded = False
|
| 32 |
+
self.embeddings = []
|
| 33 |
+
self.message_ids = []
|
| 34 |
+
self.from_names = []
|
| 35 |
+
self.text_previews = []
|
| 36 |
+
|
| 37 |
+
def _load_model(self):
|
| 38 |
+
"""Load the embedding model (same one used in Colab)."""
|
| 39 |
+
if not HAS_TRANSFORMERS:
|
| 40 |
+
raise RuntimeError(
|
| 41 |
+
"sentence-transformers not installed.\n"
|
| 42 |
+
"Install with: pip install sentence-transformers"
|
| 43 |
+
)
|
| 44 |
+
if self.model is None:
|
| 45 |
+
print("Loading embedding model...")
|
| 46 |
+
self.model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
|
| 47 |
+
print("Model loaded!")
|
| 48 |
+
|
| 49 |
+
def reload_embeddings(self):
|
| 50 |
+
"""Force reload embeddings from DB (e.g., after daily sync adds new ones)."""
|
| 51 |
+
self.embeddings_loaded = False
|
| 52 |
+
self.embeddings = np.array([]).reshape(0, 0)
|
| 53 |
+
self.message_ids = []
|
| 54 |
+
self.from_names = []
|
| 55 |
+
self.text_previews = []
|
| 56 |
+
self._load_embeddings()
|
| 57 |
+
|
| 58 |
+
def _load_embeddings(self):
|
| 59 |
+
"""Load all embeddings into memory for fast search."""
|
| 60 |
+
if self.embeddings_loaded:
|
| 61 |
+
return
|
| 62 |
+
|
| 63 |
+
import os
|
| 64 |
+
if not os.path.exists(self.embeddings_db):
|
| 65 |
+
print(f"Embeddings DB not found: {self.embeddings_db}")
|
| 66 |
+
self.embeddings_loaded = True
|
| 67 |
+
self.embeddings = np.array([]).reshape(0, 0)
|
| 68 |
+
return
|
| 69 |
+
|
| 70 |
+
print(f"Loading embeddings from {self.embeddings_db}...")
|
| 71 |
+
conn = sqlite3.connect(self.embeddings_db)
|
| 72 |
+
cursor = conn.execute(
|
| 73 |
+
"SELECT message_id, from_name, text_preview, embedding FROM embeddings"
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
emb_list = []
|
| 77 |
+
for row in cursor:
|
| 78 |
+
msg_id, name, text, emb_blob = row
|
| 79 |
+
emb = np.frombuffer(emb_blob, dtype=np.float32)
|
| 80 |
+
self.message_ids.append(msg_id)
|
| 81 |
+
self.from_names.append(name or '')
|
| 82 |
+
self.text_previews.append(text or '')
|
| 83 |
+
emb_list.append(emb)
|
| 84 |
+
|
| 85 |
+
conn.close()
|
| 86 |
+
|
| 87 |
+
if len(emb_list) == 0:
|
| 88 |
+
print("No embeddings found in database")
|
| 89 |
+
self.embeddings = np.array([]).reshape(0, 0)
|
| 90 |
+
self.embeddings_loaded = True
|
| 91 |
+
return
|
| 92 |
+
|
| 93 |
+
# Stack into numpy array for fast computation
|
| 94 |
+
self.embeddings = np.vstack(emb_list)
|
| 95 |
+
# Normalize embeddings for cosine similarity
|
| 96 |
+
norms = np.linalg.norm(self.embeddings, axis=1, keepdims=True)
|
| 97 |
+
norms = np.where(norms == 0, 1, norms) # Avoid division by zero
|
| 98 |
+
self.embeddings = self.embeddings / norms
|
| 99 |
+
self.embeddings_loaded = True
|
| 100 |
+
print(f"Loaded {len(self.message_ids)} embeddings")
|
| 101 |
+
|
| 102 |
+
def search(self, query: str, limit: int = 50, min_score: float = 0.3) -> List[Dict[str, Any]]:
|
| 103 |
+
"""
|
| 104 |
+
Search for semantically similar messages.
|
| 105 |
+
|
| 106 |
+
Args:
|
| 107 |
+
query: The search query
|
| 108 |
+
limit: Max results to return
|
| 109 |
+
min_score: Minimum similarity score (0-1)
|
| 110 |
+
|
| 111 |
+
Returns:
|
| 112 |
+
List of dicts with message_id, from_name, text, score
|
| 113 |
+
"""
|
| 114 |
+
self._load_model()
|
| 115 |
+
self._load_embeddings()
|
| 116 |
+
|
| 117 |
+
if len(self.message_ids) == 0:
|
| 118 |
+
return []
|
| 119 |
+
|
| 120 |
+
# Encode query
|
| 121 |
+
query_emb = self.model.encode([query], convert_to_numpy=True)[0]
|
| 122 |
+
|
| 123 |
+
# Compute cosine similarity with all embeddings
|
| 124 |
+
# embeddings are already normalized from Colab
|
| 125 |
+
query_norm = query_emb / np.linalg.norm(query_emb)
|
| 126 |
+
similarities = np.dot(self.embeddings, query_norm)
|
| 127 |
+
|
| 128 |
+
# Get top results
|
| 129 |
+
top_indices = np.argsort(similarities)[::-1][:limit * 2] # Get more, then filter
|
| 130 |
+
|
| 131 |
+
results = []
|
| 132 |
+
for idx in top_indices:
|
| 133 |
+
score = float(similarities[idx])
|
| 134 |
+
if score < min_score:
|
| 135 |
+
continue
|
| 136 |
+
results.append({
|
| 137 |
+
'message_id': int(self.message_ids[idx]),
|
| 138 |
+
'from_name': self.from_names[idx],
|
| 139 |
+
'text': self.text_previews[idx],
|
| 140 |
+
'score': score
|
| 141 |
+
})
|
| 142 |
+
if len(results) >= limit:
|
| 143 |
+
break
|
| 144 |
+
|
| 145 |
+
return results
|
| 146 |
+
|
| 147 |
+
def search_with_full_text(self, query: str, limit: int = 20) -> List[Dict[str, Any]]:
|
| 148 |
+
"""
|
| 149 |
+
Search and return full message text from messages DB.
|
| 150 |
+
"""
|
| 151 |
+
results = self.search(query, limit=limit)
|
| 152 |
+
|
| 153 |
+
if not results:
|
| 154 |
+
return []
|
| 155 |
+
|
| 156 |
+
# Get full text from messages DB
|
| 157 |
+
conn = sqlite3.connect(self.messages_db)
|
| 158 |
+
conn.row_factory = sqlite3.Row
|
| 159 |
+
|
| 160 |
+
for result in results:
|
| 161 |
+
cursor = conn.execute(
|
| 162 |
+
"SELECT date, from_name, text_plain, reply_to_message_id FROM messages WHERE id = ?",
|
| 163 |
+
(result['message_id'],)
|
| 164 |
+
)
|
| 165 |
+
row = cursor.fetchone()
|
| 166 |
+
if row:
|
| 167 |
+
result['date'] = row['date']
|
| 168 |
+
result['from_name'] = row['from_name']
|
| 169 |
+
result['text'] = row['text_plain']
|
| 170 |
+
result['reply_to_message_id'] = row['reply_to_message_id']
|
| 171 |
+
|
| 172 |
+
conn.close()
|
| 173 |
+
return results
|
| 174 |
+
|
| 175 |
+
def _add_thread_context(self, results: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
| 176 |
+
"""
|
| 177 |
+
Add FULL thread context to search results.
|
| 178 |
+
For each message, find the entire conversation thread:
|
| 179 |
+
1. Go up to find the root message
|
| 180 |
+
2. Get all messages in that thread
|
| 181 |
+
"""
|
| 182 |
+
if not results:
|
| 183 |
+
return results
|
| 184 |
+
|
| 185 |
+
conn = sqlite3.connect(self.messages_db)
|
| 186 |
+
conn.row_factory = sqlite3.Row
|
| 187 |
+
|
| 188 |
+
all_messages = {r['message_id']: r for r in results}
|
| 189 |
+
thread_roots = set()
|
| 190 |
+
|
| 191 |
+
# Step 1: Find root messages by following reply chains UP
|
| 192 |
+
for result in results:
|
| 193 |
+
msg_id = result['message_id']
|
| 194 |
+
reply_to = result.get('reply_to_message_id')
|
| 195 |
+
|
| 196 |
+
# Follow the chain up to find the root
|
| 197 |
+
current_id = msg_id
|
| 198 |
+
current_reply_to = reply_to
|
| 199 |
+
visited = {current_id}
|
| 200 |
+
|
| 201 |
+
while current_reply_to and current_reply_to not in visited:
|
| 202 |
+
visited.add(current_reply_to)
|
| 203 |
+
cursor = conn.execute(
|
| 204 |
+
"SELECT id, reply_to_message_id FROM messages WHERE id = ?",
|
| 205 |
+
(current_reply_to,)
|
| 206 |
+
)
|
| 207 |
+
row = cursor.fetchone()
|
| 208 |
+
if row:
|
| 209 |
+
current_id = row['id']
|
| 210 |
+
current_reply_to = row['reply_to_message_id']
|
| 211 |
+
else:
|
| 212 |
+
break
|
| 213 |
+
|
| 214 |
+
# current_id is now the root of this thread
|
| 215 |
+
thread_roots.add(current_id)
|
| 216 |
+
|
| 217 |
+
# Step 2: Get ALL messages in these threads (recursively)
|
| 218 |
+
def get_thread_messages(root_ids, depth=0, max_depth=10):
|
| 219 |
+
"""Recursively get all messages in threads."""
|
| 220 |
+
if not root_ids or depth > max_depth:
|
| 221 |
+
return []
|
| 222 |
+
|
| 223 |
+
messages = []
|
| 224 |
+
|
| 225 |
+
# Get root messages themselves
|
| 226 |
+
if root_ids:
|
| 227 |
+
placeholders = ','.join('?' * len(root_ids))
|
| 228 |
+
cursor = conn.execute(f"""
|
| 229 |
+
SELECT id, date, from_name, text_plain, reply_to_message_id
|
| 230 |
+
FROM messages WHERE id IN ({placeholders})
|
| 231 |
+
""", list(root_ids))
|
| 232 |
+
for row in cursor:
|
| 233 |
+
if row['id'] not in all_messages:
|
| 234 |
+
messages.append({
|
| 235 |
+
'message_id': row['id'],
|
| 236 |
+
'date': row['date'],
|
| 237 |
+
'from_name': row['from_name'],
|
| 238 |
+
'text': row['text_plain'],
|
| 239 |
+
'reply_to_message_id': row['reply_to_message_id'],
|
| 240 |
+
'is_thread_context': True
|
| 241 |
+
})
|
| 242 |
+
all_messages[row['id']] = messages[-1]
|
| 243 |
+
|
| 244 |
+
# Get all replies to these messages
|
| 245 |
+
all_ids = set(root_ids) | set(all_messages.keys())
|
| 246 |
+
if all_ids:
|
| 247 |
+
placeholders = ','.join('?' * len(all_ids))
|
| 248 |
+
cursor = conn.execute(f"""
|
| 249 |
+
SELECT id, date, from_name, text_plain, reply_to_message_id
|
| 250 |
+
FROM messages WHERE reply_to_message_id IN ({placeholders})
|
| 251 |
+
LIMIT 200
|
| 252 |
+
""", list(all_ids))
|
| 253 |
+
|
| 254 |
+
new_ids = set()
|
| 255 |
+
for row in cursor:
|
| 256 |
+
if row['id'] not in all_messages:
|
| 257 |
+
msg = {
|
| 258 |
+
'message_id': row['id'],
|
| 259 |
+
'date': row['date'],
|
| 260 |
+
'from_name': row['from_name'],
|
| 261 |
+
'text': row['text_plain'],
|
| 262 |
+
'reply_to_message_id': row['reply_to_message_id'],
|
| 263 |
+
'is_thread_context': True
|
| 264 |
+
}
|
| 265 |
+
messages.append(msg)
|
| 266 |
+
all_messages[row['id']] = msg
|
| 267 |
+
new_ids.add(row['id'])
|
| 268 |
+
|
| 269 |
+
# Recursively get replies to the new messages
|
| 270 |
+
if new_ids:
|
| 271 |
+
messages.extend(get_thread_messages(new_ids, depth + 1, max_depth))
|
| 272 |
+
|
| 273 |
+
return messages
|
| 274 |
+
|
| 275 |
+
# Get all thread messages
|
| 276 |
+
get_thread_messages(thread_roots)
|
| 277 |
+
|
| 278 |
+
conn.close()
|
| 279 |
+
|
| 280 |
+
# Sort all messages by date
|
| 281 |
+
all_list = list(all_messages.values())
|
| 282 |
+
all_list.sort(key=lambda x: x.get('date', '') or '')
|
| 283 |
+
|
| 284 |
+
return all_list
|
| 285 |
+
|
| 286 |
+
def search_with_ai_answer(self, query: str, ai_engine, limit: int = 30) -> Dict[str, Any]:
|
| 287 |
+
"""
|
| 288 |
+
Search semantically and send results to AI for reasoning.
|
| 289 |
+
|
| 290 |
+
This combines the power of:
|
| 291 |
+
1. Semantic search (finds relevant messages by meaning)
|
| 292 |
+
2. Thread context (includes replies to/from found messages)
|
| 293 |
+
3. AI reasoning (reads messages and answers the question)
|
| 294 |
+
"""
|
| 295 |
+
results = self.search_with_full_text(query, limit=limit)
|
| 296 |
+
|
| 297 |
+
if not results:
|
| 298 |
+
return {
|
| 299 |
+
'query': query,
|
| 300 |
+
'answer': 'לא נמצאו הודעות רלוונטיות',
|
| 301 |
+
'mode': 'semantic_ai',
|
| 302 |
+
'results': [],
|
| 303 |
+
'count': 0
|
| 304 |
+
}
|
| 305 |
+
|
| 306 |
+
# Get thread context for each result
|
| 307 |
+
results_with_threads = self._add_thread_context(results)
|
| 308 |
+
|
| 309 |
+
# Build context from semantic search results + threads
|
| 310 |
+
context_text = "\n".join([
|
| 311 |
+
f"[{r.get('date', '')}] {r.get('from_name', 'Unknown')}: {r.get('text', '')[:500]}"
|
| 312 |
+
for r in results_with_threads if r.get('text')
|
| 313 |
+
])
|
| 314 |
+
|
| 315 |
+
# Send to AI for reasoning
|
| 316 |
+
reason_prompt = f"""You are analyzing a Telegram chat history to answer a question.
|
| 317 |
+
The messages below were found using semantic search, along with their thread context (replies).
|
| 318 |
+
Read them carefully and provide a comprehensive answer.
|
| 319 |
+
|
| 320 |
+
Question: {query}
|
| 321 |
+
|
| 322 |
+
Relevant messages and their threads:
|
| 323 |
+
{context_text}
|
| 324 |
+
|
| 325 |
+
Based on these messages, answer the question in Hebrew.
|
| 326 |
+
If you can find the answer, provide it clearly.
|
| 327 |
+
Pay special attention to reply chains - the answer might be in a reply!
|
| 328 |
+
If you can infer information from context clues, do so.
|
| 329 |
+
Cite specific messages when relevant.
|
| 330 |
+
|
| 331 |
+
Answer:"""
|
| 332 |
+
|
| 333 |
+
try:
|
| 334 |
+
# Call the appropriate AI provider based on engine configuration
|
| 335 |
+
provider = getattr(ai_engine, 'provider', None)
|
| 336 |
+
if provider == 'gemini':
|
| 337 |
+
answer = ai_engine._call_gemini(reason_prompt)
|
| 338 |
+
elif provider == 'groq':
|
| 339 |
+
answer = ai_engine._call_groq(reason_prompt)
|
| 340 |
+
elif provider == 'ollama':
|
| 341 |
+
answer = ai_engine._call_ollama(reason_prompt)
|
| 342 |
+
else:
|
| 343 |
+
answer = "AI engine not available for reasoning"
|
| 344 |
+
except Exception as e:
|
| 345 |
+
answer = f"שגיאה ב-AI: {str(e)}"
|
| 346 |
+
|
| 347 |
+
return {
|
| 348 |
+
'query': query,
|
| 349 |
+
'answer': answer,
|
| 350 |
+
'mode': 'semantic_ai',
|
| 351 |
+
'results': results, # Original results for display
|
| 352 |
+
'count': len(results),
|
| 353 |
+
'total_with_threads': len(results_with_threads)
|
| 354 |
+
}
|
| 355 |
+
|
| 356 |
+
def is_available(self) -> bool:
|
| 357 |
+
"""Check if semantic search is available (DB exists and has embeddings)."""
|
| 358 |
+
import os
|
| 359 |
+
if not HAS_TRANSFORMERS or not os.path.exists(self.embeddings_db):
|
| 360 |
+
return False
|
| 361 |
+
try:
|
| 362 |
+
conn = sqlite3.connect(self.embeddings_db)
|
| 363 |
+
count = conn.execute("SELECT COUNT(*) FROM embeddings").fetchone()[0]
|
| 364 |
+
conn.close()
|
| 365 |
+
return count > 0
|
| 366 |
+
except Exception:
|
| 367 |
+
return False
|
| 368 |
+
|
| 369 |
+
def stats(self) -> Dict[str, Any]:
|
| 370 |
+
"""Get statistics about the embeddings."""
|
| 371 |
+
import os
|
| 372 |
+
|
| 373 |
+
if not os.path.exists(self.embeddings_db):
|
| 374 |
+
return {'available': False, 'error': 'embeddings.db not found'}
|
| 375 |
+
|
| 376 |
+
conn = sqlite3.connect(self.embeddings_db)
|
| 377 |
+
cursor = conn.execute("SELECT COUNT(*) FROM embeddings")
|
| 378 |
+
count = cursor.fetchone()[0]
|
| 379 |
+
conn.close()
|
| 380 |
+
|
| 381 |
+
size_mb = os.path.getsize(self.embeddings_db) / (1024 * 1024)
|
| 382 |
+
|
| 383 |
+
return {
|
| 384 |
+
'available': True,
|
| 385 |
+
'count': count,
|
| 386 |
+
'size_mb': round(size_mb, 1),
|
| 387 |
+
'model': 'paraphrase-multilingual-MiniLM-L12-v2'
|
| 388 |
+
}
|
| 389 |
+
|
| 390 |
+
|
| 391 |
+
# Singleton instance
|
| 392 |
+
_search_instance = None
|
| 393 |
+
|
| 394 |
+
def get_semantic_search() -> SemanticSearch:
|
| 395 |
+
"""Get or create semantic search instance."""
|
| 396 |
+
global _search_instance
|
| 397 |
+
if _search_instance is None:
|
| 398 |
+
_search_instance = SemanticSearch()
|
| 399 |
+
return _search_instance
|
| 400 |
+
|
| 401 |
+
|
| 402 |
+
if __name__ == '__main__':
|
| 403 |
+
# Test
|
| 404 |
+
ss = SemanticSearch()
|
| 405 |
+
print("Stats:", ss.stats())
|
| 406 |
+
|
| 407 |
+
if ss.is_available():
|
| 408 |
+
results = ss.search("איפה אתה עובד?", limit=5)
|
| 409 |
+
print("\nResults for 'איפה אתה עובד?':")
|
| 410 |
+
for r in results:
|
| 411 |
+
print(f" [{r['score']:.3f}] {r['from_name']}: {r['text'][:60]}...")
|
static/css/style.css
ADDED
|
@@ -0,0 +1,859 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/* ==========================================
|
| 2 |
+
TELEGRAM ANALYTICS DASHBOARD - CSS
|
| 3 |
+
========================================== */
|
| 4 |
+
|
| 5 |
+
:root {
|
| 6 |
+
/* Colors */
|
| 7 |
+
--primary: #0088cc;
|
| 8 |
+
--primary-dark: #006699;
|
| 9 |
+
--primary-light: #33a3d9;
|
| 10 |
+
--secondary: #6c757d;
|
| 11 |
+
--success: #28a745;
|
| 12 |
+
--warning: #ffc107;
|
| 13 |
+
--danger: #dc3545;
|
| 14 |
+
--info: #17a2b8;
|
| 15 |
+
|
| 16 |
+
/* Dark theme */
|
| 17 |
+
--bg-dark: #1a1a2e;
|
| 18 |
+
--bg-card: #16213e;
|
| 19 |
+
--bg-sidebar: #0f0f23;
|
| 20 |
+
--text-primary: #ffffff;
|
| 21 |
+
--text-secondary: #a0aec0;
|
| 22 |
+
--text-muted: #718096;
|
| 23 |
+
--border-color: #2d3748;
|
| 24 |
+
|
| 25 |
+
/* Spacing */
|
| 26 |
+
--sidebar-width: 250px;
|
| 27 |
+
--header-height: 70px;
|
| 28 |
+
--spacing-xs: 0.25rem;
|
| 29 |
+
--spacing-sm: 0.5rem;
|
| 30 |
+
--spacing-md: 1rem;
|
| 31 |
+
--spacing-lg: 1.5rem;
|
| 32 |
+
--spacing-xl: 2rem;
|
| 33 |
+
|
| 34 |
+
/* Border radius */
|
| 35 |
+
--radius-sm: 4px;
|
| 36 |
+
--radius-md: 8px;
|
| 37 |
+
--radius-lg: 12px;
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
/* Reset */
|
| 41 |
+
* {
|
| 42 |
+
margin: 0;
|
| 43 |
+
padding: 0;
|
| 44 |
+
box-sizing: border-box;
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
body {
|
| 48 |
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, sans-serif;
|
| 49 |
+
background: var(--bg-dark);
|
| 50 |
+
color: var(--text-primary);
|
| 51 |
+
min-height: 100vh;
|
| 52 |
+
display: flex;
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
/* ==========================================
|
| 56 |
+
SIDEBAR
|
| 57 |
+
========================================== */
|
| 58 |
+
|
| 59 |
+
.sidebar {
|
| 60 |
+
width: var(--sidebar-width);
|
| 61 |
+
background: var(--bg-sidebar);
|
| 62 |
+
height: 100vh;
|
| 63 |
+
position: fixed;
|
| 64 |
+
left: 0;
|
| 65 |
+
top: 0;
|
| 66 |
+
display: flex;
|
| 67 |
+
flex-direction: column;
|
| 68 |
+
border-right: 1px solid var(--border-color);
|
| 69 |
+
z-index: 100;
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
.logo {
|
| 73 |
+
padding: var(--spacing-lg);
|
| 74 |
+
display: flex;
|
| 75 |
+
align-items: center;
|
| 76 |
+
gap: var(--spacing-md);
|
| 77 |
+
border-bottom: 1px solid var(--border-color);
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
.logo-icon {
|
| 81 |
+
font-size: 2rem;
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
.logo-text {
|
| 85 |
+
font-size: 1.25rem;
|
| 86 |
+
font-weight: 700;
|
| 87 |
+
color: var(--primary);
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
.nav-menu {
|
| 91 |
+
list-style: none;
|
| 92 |
+
padding: var(--spacing-md);
|
| 93 |
+
flex: 1;
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
.nav-item {
|
| 97 |
+
margin-bottom: var(--spacing-xs);
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
.nav-link {
|
| 101 |
+
display: flex;
|
| 102 |
+
align-items: center;
|
| 103 |
+
gap: var(--spacing-md);
|
| 104 |
+
padding: var(--spacing-md);
|
| 105 |
+
color: var(--text-secondary);
|
| 106 |
+
text-decoration: none;
|
| 107 |
+
border-radius: var(--radius-md);
|
| 108 |
+
transition: all 0.2s ease;
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
.nav-link:hover {
|
| 112 |
+
background: var(--bg-card);
|
| 113 |
+
color: var(--text-primary);
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
.nav-item.active .nav-link {
|
| 117 |
+
background: var(--primary);
|
| 118 |
+
color: white;
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
.nav-link .icon {
|
| 122 |
+
font-size: 1.25rem;
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
.sidebar-footer {
|
| 126 |
+
padding: var(--spacing-md);
|
| 127 |
+
border-top: 1px solid var(--border-color);
|
| 128 |
+
}
|
| 129 |
+
|
| 130 |
+
.export-buttons {
|
| 131 |
+
display: flex;
|
| 132 |
+
flex-direction: column;
|
| 133 |
+
gap: var(--spacing-sm);
|
| 134 |
+
}
|
| 135 |
+
|
| 136 |
+
/* ==========================================
|
| 137 |
+
MAIN CONTENT
|
| 138 |
+
========================================== */
|
| 139 |
+
|
| 140 |
+
.main-content {
|
| 141 |
+
margin-left: var(--sidebar-width);
|
| 142 |
+
flex: 1;
|
| 143 |
+
padding: var(--spacing-lg);
|
| 144 |
+
max-width: calc(100vw - var(--sidebar-width));
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
/* ==========================================
|
| 148 |
+
HEADER
|
| 149 |
+
========================================== */
|
| 150 |
+
|
| 151 |
+
.header {
|
| 152 |
+
display: flex;
|
| 153 |
+
justify-content: space-between;
|
| 154 |
+
align-items: center;
|
| 155 |
+
margin-bottom: var(--spacing-xl);
|
| 156 |
+
padding-bottom: var(--spacing-lg);
|
| 157 |
+
border-bottom: 1px solid var(--border-color);
|
| 158 |
+
}
|
| 159 |
+
|
| 160 |
+
.header h1 {
|
| 161 |
+
font-size: 1.75rem;
|
| 162 |
+
font-weight: 600;
|
| 163 |
+
}
|
| 164 |
+
|
| 165 |
+
.header-controls {
|
| 166 |
+
display: flex;
|
| 167 |
+
gap: var(--spacing-md);
|
| 168 |
+
align-items: center;
|
| 169 |
+
}
|
| 170 |
+
|
| 171 |
+
/* ==========================================
|
| 172 |
+
BUTTONS & INPUTS
|
| 173 |
+
========================================== */
|
| 174 |
+
|
| 175 |
+
.btn {
|
| 176 |
+
padding: var(--spacing-sm) var(--spacing-md);
|
| 177 |
+
border: none;
|
| 178 |
+
border-radius: var(--radius-md);
|
| 179 |
+
cursor: pointer;
|
| 180 |
+
font-size: 0.875rem;
|
| 181 |
+
font-weight: 500;
|
| 182 |
+
transition: all 0.2s ease;
|
| 183 |
+
display: inline-flex;
|
| 184 |
+
align-items: center;
|
| 185 |
+
gap: var(--spacing-sm);
|
| 186 |
+
background: var(--bg-card);
|
| 187 |
+
color: var(--text-primary);
|
| 188 |
+
border: 1px solid var(--border-color);
|
| 189 |
+
}
|
| 190 |
+
|
| 191 |
+
.btn:hover {
|
| 192 |
+
background: var(--border-color);
|
| 193 |
+
}
|
| 194 |
+
|
| 195 |
+
.btn-primary {
|
| 196 |
+
background: var(--primary);
|
| 197 |
+
color: white;
|
| 198 |
+
border: none;
|
| 199 |
+
}
|
| 200 |
+
|
| 201 |
+
.btn-primary:hover {
|
| 202 |
+
background: var(--primary-dark);
|
| 203 |
+
}
|
| 204 |
+
|
| 205 |
+
.btn-sm {
|
| 206 |
+
padding: var(--spacing-xs) var(--spacing-sm);
|
| 207 |
+
font-size: 0.75rem;
|
| 208 |
+
}
|
| 209 |
+
|
| 210 |
+
.select, .select-sm {
|
| 211 |
+
padding: var(--spacing-sm) var(--spacing-md);
|
| 212 |
+
border: 1px solid var(--border-color);
|
| 213 |
+
border-radius: var(--radius-md);
|
| 214 |
+
background: var(--bg-card);
|
| 215 |
+
color: var(--text-primary);
|
| 216 |
+
font-size: 0.875rem;
|
| 217 |
+
cursor: pointer;
|
| 218 |
+
}
|
| 219 |
+
|
| 220 |
+
.select-sm {
|
| 221 |
+
padding: var(--spacing-xs) var(--spacing-sm);
|
| 222 |
+
font-size: 0.75rem;
|
| 223 |
+
}
|
| 224 |
+
|
| 225 |
+
.select:focus, .select-sm:focus {
|
| 226 |
+
outline: none;
|
| 227 |
+
border-color: var(--primary);
|
| 228 |
+
}
|
| 229 |
+
|
| 230 |
+
input[type="text"], input[type="search"] {
|
| 231 |
+
padding: var(--spacing-sm) var(--spacing-md);
|
| 232 |
+
border: 1px solid var(--border-color);
|
| 233 |
+
border-radius: var(--radius-md);
|
| 234 |
+
background: var(--bg-card);
|
| 235 |
+
color: var(--text-primary);
|
| 236 |
+
font-size: 0.875rem;
|
| 237 |
+
width: 100%;
|
| 238 |
+
}
|
| 239 |
+
|
| 240 |
+
input:focus {
|
| 241 |
+
outline: none;
|
| 242 |
+
border-color: var(--primary);
|
| 243 |
+
}
|
| 244 |
+
|
| 245 |
+
/* ==========================================
|
| 246 |
+
STATS CARDS
|
| 247 |
+
========================================== */
|
| 248 |
+
|
| 249 |
+
.stats-grid {
|
| 250 |
+
display: grid;
|
| 251 |
+
grid-template-columns: repeat(auto-fit, minmax(180px, 1fr));
|
| 252 |
+
gap: var(--spacing-md);
|
| 253 |
+
margin-bottom: var(--spacing-xl);
|
| 254 |
+
}
|
| 255 |
+
|
| 256 |
+
.stat-card {
|
| 257 |
+
background: var(--bg-card);
|
| 258 |
+
border-radius: var(--radius-lg);
|
| 259 |
+
padding: var(--spacing-lg);
|
| 260 |
+
display: flex;
|
| 261 |
+
align-items: center;
|
| 262 |
+
gap: var(--spacing-md);
|
| 263 |
+
border: 1px solid var(--border-color);
|
| 264 |
+
transition: transform 0.2s ease, box-shadow 0.2s ease;
|
| 265 |
+
}
|
| 266 |
+
|
| 267 |
+
.stat-card:hover {
|
| 268 |
+
transform: translateY(-2px);
|
| 269 |
+
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.3);
|
| 270 |
+
}
|
| 271 |
+
|
| 272 |
+
.stat-icon {
|
| 273 |
+
font-size: 2.5rem;
|
| 274 |
+
opacity: 0.9;
|
| 275 |
+
}
|
| 276 |
+
|
| 277 |
+
.stat-value {
|
| 278 |
+
font-size: 1.75rem;
|
| 279 |
+
font-weight: 700;
|
| 280 |
+
color: var(--text-primary);
|
| 281 |
+
}
|
| 282 |
+
|
| 283 |
+
.stat-label {
|
| 284 |
+
font-size: 0.875rem;
|
| 285 |
+
color: var(--text-muted);
|
| 286 |
+
margin-top: var(--spacing-xs);
|
| 287 |
+
}
|
| 288 |
+
|
| 289 |
+
/* ==========================================
|
| 290 |
+
CHARTS
|
| 291 |
+
========================================== */
|
| 292 |
+
|
| 293 |
+
.charts-row {
|
| 294 |
+
display: grid;
|
| 295 |
+
grid-template-columns: repeat(2, 1fr);
|
| 296 |
+
gap: var(--spacing-lg);
|
| 297 |
+
margin-bottom: var(--spacing-xl);
|
| 298 |
+
}
|
| 299 |
+
|
| 300 |
+
.chart-card {
|
| 301 |
+
background: var(--bg-card);
|
| 302 |
+
border-radius: var(--radius-lg);
|
| 303 |
+
padding: var(--spacing-lg);
|
| 304 |
+
border: 1px solid var(--border-color);
|
| 305 |
+
}
|
| 306 |
+
|
| 307 |
+
.chart-card.large {
|
| 308 |
+
grid-column: span 1;
|
| 309 |
+
}
|
| 310 |
+
|
| 311 |
+
.chart-card.full-width {
|
| 312 |
+
grid-column: span 2;
|
| 313 |
+
}
|
| 314 |
+
|
| 315 |
+
.chart-header {
|
| 316 |
+
display: flex;
|
| 317 |
+
justify-content: space-between;
|
| 318 |
+
align-items: center;
|
| 319 |
+
margin-bottom: var(--spacing-md);
|
| 320 |
+
}
|
| 321 |
+
|
| 322 |
+
.chart-header h3 {
|
| 323 |
+
font-size: 1rem;
|
| 324 |
+
font-weight: 600;
|
| 325 |
+
color: var(--text-primary);
|
| 326 |
+
}
|
| 327 |
+
|
| 328 |
+
.chart-subtitle {
|
| 329 |
+
font-size: 0.75rem;
|
| 330 |
+
color: var(--text-muted);
|
| 331 |
+
}
|
| 332 |
+
|
| 333 |
+
.chart-container {
|
| 334 |
+
position: relative;
|
| 335 |
+
height: 250px;
|
| 336 |
+
}
|
| 337 |
+
|
| 338 |
+
/* ==========================================
|
| 339 |
+
HEATMAP
|
| 340 |
+
========================================== */
|
| 341 |
+
|
| 342 |
+
.heatmap-container {
|
| 343 |
+
overflow-x: auto;
|
| 344 |
+
}
|
| 345 |
+
|
| 346 |
+
.heatmap-table {
|
| 347 |
+
width: 100%;
|
| 348 |
+
border-collapse: collapse;
|
| 349 |
+
font-size: 0.75rem;
|
| 350 |
+
}
|
| 351 |
+
|
| 352 |
+
.heatmap-table th,
|
| 353 |
+
.heatmap-table td {
|
| 354 |
+
padding: var(--spacing-xs);
|
| 355 |
+
text-align: center;
|
| 356 |
+
min-width: 35px;
|
| 357 |
+
}
|
| 358 |
+
|
| 359 |
+
.heatmap-table th {
|
| 360 |
+
color: var(--text-muted);
|
| 361 |
+
font-weight: 500;
|
| 362 |
+
}
|
| 363 |
+
|
| 364 |
+
.heatmap-cell {
|
| 365 |
+
width: 30px;
|
| 366 |
+
height: 30px;
|
| 367 |
+
border-radius: var(--radius-sm);
|
| 368 |
+
display: inline-block;
|
| 369 |
+
transition: transform 0.2s ease;
|
| 370 |
+
}
|
| 371 |
+
|
| 372 |
+
.heatmap-cell:hover {
|
| 373 |
+
transform: scale(1.2);
|
| 374 |
+
}
|
| 375 |
+
|
| 376 |
+
.day-label {
|
| 377 |
+
text-align: right;
|
| 378 |
+
padding-right: var(--spacing-sm) !important;
|
| 379 |
+
color: var(--text-secondary);
|
| 380 |
+
}
|
| 381 |
+
|
| 382 |
+
/* ==========================================
|
| 383 |
+
LISTS
|
| 384 |
+
========================================== */
|
| 385 |
+
|
| 386 |
+
.lists-row {
|
| 387 |
+
display: grid;
|
| 388 |
+
grid-template-columns: repeat(3, 1fr);
|
| 389 |
+
gap: var(--spacing-lg);
|
| 390 |
+
margin-bottom: var(--spacing-xl);
|
| 391 |
+
}
|
| 392 |
+
|
| 393 |
+
.list-card {
|
| 394 |
+
background: var(--bg-card);
|
| 395 |
+
border-radius: var(--radius-lg);
|
| 396 |
+
border: 1px solid var(--border-color);
|
| 397 |
+
overflow: hidden;
|
| 398 |
+
}
|
| 399 |
+
|
| 400 |
+
.list-header {
|
| 401 |
+
display: flex;
|
| 402 |
+
justify-content: space-between;
|
| 403 |
+
align-items: center;
|
| 404 |
+
padding: var(--spacing-md) var(--spacing-lg);
|
| 405 |
+
border-bottom: 1px solid var(--border-color);
|
| 406 |
+
}
|
| 407 |
+
|
| 408 |
+
.list-header h3 {
|
| 409 |
+
font-size: 1rem;
|
| 410 |
+
font-weight: 600;
|
| 411 |
+
}
|
| 412 |
+
|
| 413 |
+
.link {
|
| 414 |
+
color: var(--primary);
|
| 415 |
+
text-decoration: none;
|
| 416 |
+
font-size: 0.875rem;
|
| 417 |
+
}
|
| 418 |
+
|
| 419 |
+
.link:hover {
|
| 420 |
+
text-decoration: underline;
|
| 421 |
+
}
|
| 422 |
+
|
| 423 |
+
.list-content {
|
| 424 |
+
max-height: 350px;
|
| 425 |
+
overflow-y: auto;
|
| 426 |
+
}
|
| 427 |
+
|
| 428 |
+
.list-item {
|
| 429 |
+
display: flex;
|
| 430 |
+
align-items: center;
|
| 431 |
+
padding: var(--spacing-sm) var(--spacing-lg);
|
| 432 |
+
border-bottom: 1px solid var(--border-color);
|
| 433 |
+
gap: var(--spacing-md);
|
| 434 |
+
}
|
| 435 |
+
|
| 436 |
+
.list-item:last-child {
|
| 437 |
+
border-bottom: none;
|
| 438 |
+
}
|
| 439 |
+
|
| 440 |
+
.list-item:hover {
|
| 441 |
+
background: rgba(255, 255, 255, 0.02);
|
| 442 |
+
}
|
| 443 |
+
|
| 444 |
+
.list-rank {
|
| 445 |
+
font-weight: 700;
|
| 446 |
+
color: var(--text-muted);
|
| 447 |
+
min-width: 30px;
|
| 448 |
+
}
|
| 449 |
+
|
| 450 |
+
.list-rank.gold { color: #ffd700; }
|
| 451 |
+
.list-rank.silver { color: #c0c0c0; }
|
| 452 |
+
.list-rank.bronze { color: #cd7f32; }
|
| 453 |
+
|
| 454 |
+
.list-info {
|
| 455 |
+
flex: 1;
|
| 456 |
+
min-width: 0;
|
| 457 |
+
}
|
| 458 |
+
|
| 459 |
+
.list-name {
|
| 460 |
+
font-weight: 500;
|
| 461 |
+
white-space: nowrap;
|
| 462 |
+
overflow: hidden;
|
| 463 |
+
text-overflow: ellipsis;
|
| 464 |
+
}
|
| 465 |
+
|
| 466 |
+
.list-subtitle {
|
| 467 |
+
font-size: 0.75rem;
|
| 468 |
+
color: var(--text-muted);
|
| 469 |
+
}
|
| 470 |
+
|
| 471 |
+
.list-value {
|
| 472 |
+
font-weight: 600;
|
| 473 |
+
color: var(--primary);
|
| 474 |
+
}
|
| 475 |
+
|
| 476 |
+
/* ==========================================
|
| 477 |
+
USERS PAGE
|
| 478 |
+
========================================== */
|
| 479 |
+
|
| 480 |
+
.users-table {
|
| 481 |
+
width: 100%;
|
| 482 |
+
border-collapse: collapse;
|
| 483 |
+
}
|
| 484 |
+
|
| 485 |
+
.users-table th,
|
| 486 |
+
.users-table td {
|
| 487 |
+
padding: var(--spacing-md);
|
| 488 |
+
text-align: left;
|
| 489 |
+
border-bottom: 1px solid var(--border-color);
|
| 490 |
+
}
|
| 491 |
+
|
| 492 |
+
.users-table th {
|
| 493 |
+
background: var(--bg-sidebar);
|
| 494 |
+
font-weight: 600;
|
| 495 |
+
color: var(--text-secondary);
|
| 496 |
+
font-size: 0.875rem;
|
| 497 |
+
position: sticky;
|
| 498 |
+
top: 0;
|
| 499 |
+
}
|
| 500 |
+
|
| 501 |
+
.users-table tr:hover {
|
| 502 |
+
background: rgba(255, 255, 255, 0.02);
|
| 503 |
+
}
|
| 504 |
+
|
| 505 |
+
.user-avatar {
|
| 506 |
+
width: 36px;
|
| 507 |
+
height: 36px;
|
| 508 |
+
border-radius: 50%;
|
| 509 |
+
background: var(--primary);
|
| 510 |
+
display: flex;
|
| 511 |
+
align-items: center;
|
| 512 |
+
justify-content: center;
|
| 513 |
+
font-weight: 700;
|
| 514 |
+
font-size: 0.875rem;
|
| 515 |
+
}
|
| 516 |
+
|
| 517 |
+
.user-cell {
|
| 518 |
+
display: flex;
|
| 519 |
+
align-items: center;
|
| 520 |
+
gap: var(--spacing-md);
|
| 521 |
+
}
|
| 522 |
+
|
| 523 |
+
.progress-bar {
|
| 524 |
+
height: 6px;
|
| 525 |
+
background: var(--border-color);
|
| 526 |
+
border-radius: 3px;
|
| 527 |
+
overflow: hidden;
|
| 528 |
+
margin-top: var(--spacing-xs);
|
| 529 |
+
}
|
| 530 |
+
|
| 531 |
+
.progress-fill {
|
| 532 |
+
height: 100%;
|
| 533 |
+
background: var(--primary);
|
| 534 |
+
border-radius: 3px;
|
| 535 |
+
}
|
| 536 |
+
|
| 537 |
+
/* ==========================================
|
| 538 |
+
SEARCH PAGE
|
| 539 |
+
========================================== */
|
| 540 |
+
|
| 541 |
+
.search-box {
|
| 542 |
+
display: flex;
|
| 543 |
+
gap: var(--spacing-md);
|
| 544 |
+
margin-bottom: var(--spacing-xl);
|
| 545 |
+
}
|
| 546 |
+
|
| 547 |
+
.search-input {
|
| 548 |
+
flex: 1;
|
| 549 |
+
}
|
| 550 |
+
|
| 551 |
+
.search-results {
|
| 552 |
+
background: var(--bg-card);
|
| 553 |
+
border-radius: var(--radius-lg);
|
| 554 |
+
border: 1px solid var(--border-color);
|
| 555 |
+
}
|
| 556 |
+
|
| 557 |
+
.search-result-item {
|
| 558 |
+
padding: var(--spacing-lg);
|
| 559 |
+
border-bottom: 1px solid var(--border-color);
|
| 560 |
+
}
|
| 561 |
+
|
| 562 |
+
.search-result-item:last-child {
|
| 563 |
+
border-bottom: none;
|
| 564 |
+
}
|
| 565 |
+
|
| 566 |
+
.search-result-header {
|
| 567 |
+
display: flex;
|
| 568 |
+
justify-content: space-between;
|
| 569 |
+
align-items: center;
|
| 570 |
+
margin-bottom: var(--spacing-sm);
|
| 571 |
+
}
|
| 572 |
+
|
| 573 |
+
.search-result-author {
|
| 574 |
+
font-weight: 600;
|
| 575 |
+
color: var(--primary);
|
| 576 |
+
}
|
| 577 |
+
|
| 578 |
+
.search-result-date {
|
| 579 |
+
font-size: 0.75rem;
|
| 580 |
+
color: var(--text-muted);
|
| 581 |
+
}
|
| 582 |
+
|
| 583 |
+
.search-result-text {
|
| 584 |
+
color: var(--text-secondary);
|
| 585 |
+
line-height: 1.5;
|
| 586 |
+
}
|
| 587 |
+
|
| 588 |
+
.search-highlight {
|
| 589 |
+
background: rgba(0, 136, 204, 0.3);
|
| 590 |
+
padding: 0 2px;
|
| 591 |
+
border-radius: 2px;
|
| 592 |
+
}
|
| 593 |
+
|
| 594 |
+
/* ==========================================
|
| 595 |
+
PAGINATION
|
| 596 |
+
========================================== */
|
| 597 |
+
|
| 598 |
+
.pagination {
|
| 599 |
+
display: flex;
|
| 600 |
+
justify-content: center;
|
| 601 |
+
gap: var(--spacing-sm);
|
| 602 |
+
margin-top: var(--spacing-xl);
|
| 603 |
+
}
|
| 604 |
+
|
| 605 |
+
.page-btn {
|
| 606 |
+
padding: var(--spacing-sm) var(--spacing-md);
|
| 607 |
+
background: var(--bg-card);
|
| 608 |
+
border: 1px solid var(--border-color);
|
| 609 |
+
border-radius: var(--radius-md);
|
| 610 |
+
color: var(--text-primary);
|
| 611 |
+
cursor: pointer;
|
| 612 |
+
transition: all 0.2s ease;
|
| 613 |
+
}
|
| 614 |
+
|
| 615 |
+
.page-btn:hover {
|
| 616 |
+
background: var(--border-color);
|
| 617 |
+
}
|
| 618 |
+
|
| 619 |
+
.page-btn.active {
|
| 620 |
+
background: var(--primary);
|
| 621 |
+
border-color: var(--primary);
|
| 622 |
+
}
|
| 623 |
+
|
| 624 |
+
.page-btn:disabled {
|
| 625 |
+
opacity: 0.5;
|
| 626 |
+
cursor: not-allowed;
|
| 627 |
+
}
|
| 628 |
+
|
| 629 |
+
/* ==========================================
|
| 630 |
+
USER MODAL
|
| 631 |
+
========================================== */
|
| 632 |
+
|
| 633 |
+
.modal-overlay {
|
| 634 |
+
position: fixed;
|
| 635 |
+
top: 0;
|
| 636 |
+
left: 0;
|
| 637 |
+
right: 0;
|
| 638 |
+
bottom: 0;
|
| 639 |
+
background: rgba(0, 0, 0, 0.7);
|
| 640 |
+
display: flex;
|
| 641 |
+
align-items: center;
|
| 642 |
+
justify-content: center;
|
| 643 |
+
z-index: 1000;
|
| 644 |
+
opacity: 0;
|
| 645 |
+
visibility: hidden;
|
| 646 |
+
transition: all 0.3s ease;
|
| 647 |
+
}
|
| 648 |
+
|
| 649 |
+
.modal-overlay.active {
|
| 650 |
+
opacity: 1;
|
| 651 |
+
visibility: visible;
|
| 652 |
+
}
|
| 653 |
+
|
| 654 |
+
.modal {
|
| 655 |
+
background: var(--bg-card);
|
| 656 |
+
border-radius: var(--radius-lg);
|
| 657 |
+
width: 90%;
|
| 658 |
+
max-width: 600px;
|
| 659 |
+
max-height: 80vh;
|
| 660 |
+
overflow-y: auto;
|
| 661 |
+
border: 1px solid var(--border-color);
|
| 662 |
+
transform: translateY(-20px);
|
| 663 |
+
transition: transform 0.3s ease;
|
| 664 |
+
}
|
| 665 |
+
|
| 666 |
+
.modal-overlay.active .modal {
|
| 667 |
+
transform: translateY(0);
|
| 668 |
+
}
|
| 669 |
+
|
| 670 |
+
.modal-header {
|
| 671 |
+
display: flex;
|
| 672 |
+
justify-content: space-between;
|
| 673 |
+
align-items: center;
|
| 674 |
+
padding: var(--spacing-lg);
|
| 675 |
+
border-bottom: 1px solid var(--border-color);
|
| 676 |
+
}
|
| 677 |
+
|
| 678 |
+
.modal-header h2 {
|
| 679 |
+
font-size: 1.25rem;
|
| 680 |
+
}
|
| 681 |
+
|
| 682 |
+
.modal-close {
|
| 683 |
+
background: none;
|
| 684 |
+
border: none;
|
| 685 |
+
font-size: 1.5rem;
|
| 686 |
+
color: var(--text-secondary);
|
| 687 |
+
cursor: pointer;
|
| 688 |
+
}
|
| 689 |
+
|
| 690 |
+
.modal-body {
|
| 691 |
+
padding: var(--spacing-lg);
|
| 692 |
+
}
|
| 693 |
+
|
| 694 |
+
.user-profile {
|
| 695 |
+
display: flex;
|
| 696 |
+
align-items: center;
|
| 697 |
+
gap: var(--spacing-lg);
|
| 698 |
+
margin-bottom: var(--spacing-xl);
|
| 699 |
+
}
|
| 700 |
+
|
| 701 |
+
.user-profile-avatar {
|
| 702 |
+
width: 80px;
|
| 703 |
+
height: 80px;
|
| 704 |
+
border-radius: 50%;
|
| 705 |
+
background: var(--primary);
|
| 706 |
+
display: flex;
|
| 707 |
+
align-items: center;
|
| 708 |
+
justify-content: center;
|
| 709 |
+
font-size: 2rem;
|
| 710 |
+
font-weight: 700;
|
| 711 |
+
}
|
| 712 |
+
|
| 713 |
+
.user-profile-info h3 {
|
| 714 |
+
font-size: 1.5rem;
|
| 715 |
+
margin-bottom: var(--spacing-xs);
|
| 716 |
+
}
|
| 717 |
+
|
| 718 |
+
.user-profile-info p {
|
| 719 |
+
color: var(--text-muted);
|
| 720 |
+
}
|
| 721 |
+
|
| 722 |
+
.user-stats-grid {
|
| 723 |
+
display: grid;
|
| 724 |
+
grid-template-columns: repeat(3, 1fr);
|
| 725 |
+
gap: var(--spacing-md);
|
| 726 |
+
margin-bottom: var(--spacing-xl);
|
| 727 |
+
}
|
| 728 |
+
|
| 729 |
+
.user-stat {
|
| 730 |
+
text-align: center;
|
| 731 |
+
padding: var(--spacing-md);
|
| 732 |
+
background: var(--bg-sidebar);
|
| 733 |
+
border-radius: var(--radius-md);
|
| 734 |
+
}
|
| 735 |
+
|
| 736 |
+
.user-stat-value {
|
| 737 |
+
font-size: 1.5rem;
|
| 738 |
+
font-weight: 700;
|
| 739 |
+
color: var(--primary);
|
| 740 |
+
}
|
| 741 |
+
|
| 742 |
+
.user-stat-label {
|
| 743 |
+
font-size: 0.75rem;
|
| 744 |
+
color: var(--text-muted);
|
| 745 |
+
margin-top: var(--spacing-xs);
|
| 746 |
+
}
|
| 747 |
+
|
| 748 |
+
/* ==========================================
|
| 749 |
+
LOADING & EMPTY STATES
|
| 750 |
+
========================================== */
|
| 751 |
+
|
| 752 |
+
.loading {
|
| 753 |
+
display: flex;
|
| 754 |
+
align-items: center;
|
| 755 |
+
justify-content: center;
|
| 756 |
+
padding: var(--spacing-xl);
|
| 757 |
+
color: var(--text-muted);
|
| 758 |
+
}
|
| 759 |
+
|
| 760 |
+
.spinner {
|
| 761 |
+
width: 40px;
|
| 762 |
+
height: 40px;
|
| 763 |
+
border: 3px solid var(--border-color);
|
| 764 |
+
border-top-color: var(--primary);
|
| 765 |
+
border-radius: 50%;
|
| 766 |
+
animation: spin 1s linear infinite;
|
| 767 |
+
}
|
| 768 |
+
|
| 769 |
+
@keyframes spin {
|
| 770 |
+
to { transform: rotate(360deg); }
|
| 771 |
+
}
|
| 772 |
+
|
| 773 |
+
.empty-state {
|
| 774 |
+
text-align: center;
|
| 775 |
+
padding: var(--spacing-xl);
|
| 776 |
+
color: var(--text-muted);
|
| 777 |
+
}
|
| 778 |
+
|
| 779 |
+
.empty-state-icon {
|
| 780 |
+
font-size: 3rem;
|
| 781 |
+
margin-bottom: var(--spacing-md);
|
| 782 |
+
opacity: 0.5;
|
| 783 |
+
}
|
| 784 |
+
|
| 785 |
+
/* ==========================================
|
| 786 |
+
RESPONSIVE
|
| 787 |
+
========================================== */
|
| 788 |
+
|
| 789 |
+
@media (max-width: 1200px) {
|
| 790 |
+
.lists-row {
|
| 791 |
+
grid-template-columns: repeat(2, 1fr);
|
| 792 |
+
}
|
| 793 |
+
}
|
| 794 |
+
|
| 795 |
+
@media (max-width: 992px) {
|
| 796 |
+
.sidebar {
|
| 797 |
+
width: 70px;
|
| 798 |
+
}
|
| 799 |
+
|
| 800 |
+
.logo-text, .nav-link span:not(.icon) {
|
| 801 |
+
display: none;
|
| 802 |
+
}
|
| 803 |
+
|
| 804 |
+
.main-content {
|
| 805 |
+
margin-left: 70px;
|
| 806 |
+
max-width: calc(100vw - 70px);
|
| 807 |
+
}
|
| 808 |
+
|
| 809 |
+
.charts-row {
|
| 810 |
+
grid-template-columns: 1fr;
|
| 811 |
+
}
|
| 812 |
+
|
| 813 |
+
.chart-card.full-width,
|
| 814 |
+
.chart-card.large {
|
| 815 |
+
grid-column: span 1;
|
| 816 |
+
}
|
| 817 |
+
|
| 818 |
+
.lists-row {
|
| 819 |
+
grid-template-columns: 1fr;
|
| 820 |
+
}
|
| 821 |
+
}
|
| 822 |
+
|
| 823 |
+
@media (max-width: 768px) {
|
| 824 |
+
.stats-grid {
|
| 825 |
+
grid-template-columns: repeat(2, 1fr);
|
| 826 |
+
}
|
| 827 |
+
|
| 828 |
+
.header {
|
| 829 |
+
flex-direction: column;
|
| 830 |
+
gap: var(--spacing-md);
|
| 831 |
+
align-items: flex-start;
|
| 832 |
+
}
|
| 833 |
+
|
| 834 |
+
.user-stats-grid {
|
| 835 |
+
grid-template-columns: repeat(2, 1fr);
|
| 836 |
+
}
|
| 837 |
+
}
|
| 838 |
+
|
| 839 |
+
/* ==========================================
|
| 840 |
+
SCROLLBAR
|
| 841 |
+
========================================== */
|
| 842 |
+
|
| 843 |
+
::-webkit-scrollbar {
|
| 844 |
+
width: 8px;
|
| 845 |
+
height: 8px;
|
| 846 |
+
}
|
| 847 |
+
|
| 848 |
+
::-webkit-scrollbar-track {
|
| 849 |
+
background: var(--bg-sidebar);
|
| 850 |
+
}
|
| 851 |
+
|
| 852 |
+
::-webkit-scrollbar-thumb {
|
| 853 |
+
background: var(--border-color);
|
| 854 |
+
border-radius: 4px;
|
| 855 |
+
}
|
| 856 |
+
|
| 857 |
+
::-webkit-scrollbar-thumb:hover {
|
| 858 |
+
background: var(--text-muted);
|
| 859 |
+
}
|
static/js/dashboard.js
ADDED
|
@@ -0,0 +1,622 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* Telegram Analytics Dashboard - JavaScript
|
| 3 |
+
*
|
| 4 |
+
* Handles all interactivity:
|
| 5 |
+
* - Data fetching from API
|
| 6 |
+
* - Chart rendering with Chart.js
|
| 7 |
+
* - Real-time updates
|
| 8 |
+
* - User interactions
|
| 9 |
+
* - Export functionality
|
| 10 |
+
*/
|
| 11 |
+
|
| 12 |
+
// ==========================================
|
| 13 |
+
// GLOBAL STATE
|
| 14 |
+
// ==========================================
|
| 15 |
+
|
| 16 |
+
const state = {
|
| 17 |
+
timeframe: 'month',
|
| 18 |
+
charts: {},
|
| 19 |
+
autoRefresh: null,
|
| 20 |
+
currentPage: 1,
|
| 21 |
+
usersPerPage: 20
|
| 22 |
+
};
|
| 23 |
+
|
| 24 |
+
// Chart.js default configuration
|
| 25 |
+
Chart.defaults.color = '#a0aec0';
|
| 26 |
+
Chart.defaults.borderColor = '#2d3748';
|
| 27 |
+
Chart.defaults.font.family = '-apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif';
|
| 28 |
+
|
| 29 |
+
// ==========================================
|
| 30 |
+
// UTILITY FUNCTIONS
|
| 31 |
+
// ==========================================
|
| 32 |
+
|
| 33 |
+
function formatNumber(num) {
|
| 34 |
+
if (num >= 1000000) return (num / 1000000).toFixed(1) + 'M';
|
| 35 |
+
if (num >= 1000) return (num / 1000).toFixed(1) + 'K';
|
| 36 |
+
return num.toLocaleString();
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
function formatDate(timestamp) {
|
| 40 |
+
if (!timestamp) return '-';
|
| 41 |
+
return new Date(timestamp * 1000).toLocaleDateString('en-US', {
|
| 42 |
+
year: 'numeric',
|
| 43 |
+
month: 'short',
|
| 44 |
+
day: 'numeric'
|
| 45 |
+
});
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
function getTimeframe() {
|
| 49 |
+
const select = document.getElementById('timeframe');
|
| 50 |
+
return select ? select.value : state.timeframe;
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
async function fetchAPI(endpoint) {
|
| 54 |
+
try {
|
| 55 |
+
const timeframe = getTimeframe();
|
| 56 |
+
const separator = endpoint.includes('?') ? '&' : '?';
|
| 57 |
+
const response = await fetch(`${endpoint}${separator}timeframe=${timeframe}`);
|
| 58 |
+
return await response.json();
|
| 59 |
+
} catch (error) {
|
| 60 |
+
console.error('API Error:', error);
|
| 61 |
+
return null;
|
| 62 |
+
}
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
function showLoading(elementId) {
|
| 66 |
+
const element = document.getElementById(elementId);
|
| 67 |
+
if (element) {
|
| 68 |
+
element.innerHTML = '<div class="loading"><div class="spinner"></div></div>';
|
| 69 |
+
}
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
function showEmpty(elementId, message = 'No data available') {
|
| 73 |
+
const element = document.getElementById(elementId);
|
| 74 |
+
if (element) {
|
| 75 |
+
element.innerHTML = `
|
| 76 |
+
<div class="empty-state">
|
| 77 |
+
<div class="empty-state-icon">📭</div>
|
| 78 |
+
<p>${message}</p>
|
| 79 |
+
</div>
|
| 80 |
+
`;
|
| 81 |
+
}
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
// ==========================================
|
| 85 |
+
// DATA LOADING
|
| 86 |
+
// ==========================================
|
| 87 |
+
|
| 88 |
+
async function loadAllData() {
|
| 89 |
+
state.timeframe = getTimeframe();
|
| 90 |
+
|
| 91 |
+
// Load all data in parallel
|
| 92 |
+
await Promise.all([
|
| 93 |
+
loadOverviewStats(),
|
| 94 |
+
loadMessagesChart(),
|
| 95 |
+
loadUsersChart(),
|
| 96 |
+
loadHourlyChart(),
|
| 97 |
+
loadDailyChart(),
|
| 98 |
+
loadHeatmap(),
|
| 99 |
+
loadTopUsers(),
|
| 100 |
+
loadTopWords(),
|
| 101 |
+
loadTopDomains()
|
| 102 |
+
]);
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
async function loadOverviewStats() {
|
| 106 |
+
const data = await fetchAPI('/api/overview');
|
| 107 |
+
if (!data) return;
|
| 108 |
+
|
| 109 |
+
// Update stat cards
|
| 110 |
+
document.getElementById('total-messages').textContent = formatNumber(data.total_messages);
|
| 111 |
+
document.getElementById('active-users').textContent = formatNumber(data.active_users);
|
| 112 |
+
document.getElementById('messages-per-day').textContent = formatNumber(data.messages_per_day);
|
| 113 |
+
document.getElementById('links-count').textContent = formatNumber(data.links_count);
|
| 114 |
+
document.getElementById('media-count').textContent = formatNumber(data.media_count);
|
| 115 |
+
document.getElementById('replies-count').textContent = formatNumber(data.replies_count);
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
// ==========================================
|
| 119 |
+
// CHARTS
|
| 120 |
+
// ==========================================
|
| 121 |
+
|
| 122 |
+
async function loadMessagesChart() {
|
| 123 |
+
const granularitySelect = document.getElementById('messages-granularity');
|
| 124 |
+
const granularity = granularitySelect ? granularitySelect.value : 'day';
|
| 125 |
+
|
| 126 |
+
const data = await fetchAPI(`/api/chart/messages?granularity=${granularity}`);
|
| 127 |
+
if (!data || data.length === 0) return;
|
| 128 |
+
|
| 129 |
+
const ctx = document.getElementById('messages-chart');
|
| 130 |
+
if (!ctx) return;
|
| 131 |
+
|
| 132 |
+
// Destroy existing chart
|
| 133 |
+
if (state.charts.messages) {
|
| 134 |
+
state.charts.messages.destroy();
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
state.charts.messages = new Chart(ctx, {
|
| 138 |
+
type: 'line',
|
| 139 |
+
data: {
|
| 140 |
+
labels: data.map(d => d.label),
|
| 141 |
+
datasets: [{
|
| 142 |
+
label: 'Messages',
|
| 143 |
+
data: data.map(d => d.value),
|
| 144 |
+
borderColor: '#0088cc',
|
| 145 |
+
backgroundColor: 'rgba(0, 136, 204, 0.1)',
|
| 146 |
+
fill: true,
|
| 147 |
+
tension: 0.4,
|
| 148 |
+
pointRadius: 2,
|
| 149 |
+
pointHoverRadius: 5
|
| 150 |
+
}]
|
| 151 |
+
},
|
| 152 |
+
options: {
|
| 153 |
+
responsive: true,
|
| 154 |
+
maintainAspectRatio: false,
|
| 155 |
+
plugins: {
|
| 156 |
+
legend: { display: false }
|
| 157 |
+
},
|
| 158 |
+
scales: {
|
| 159 |
+
x: {
|
| 160 |
+
grid: { display: false },
|
| 161 |
+
ticks: { maxTicksLimit: 10 }
|
| 162 |
+
},
|
| 163 |
+
y: {
|
| 164 |
+
beginAtZero: true,
|
| 165 |
+
grid: { color: '#2d3748' }
|
| 166 |
+
}
|
| 167 |
+
},
|
| 168 |
+
interaction: {
|
| 169 |
+
intersect: false,
|
| 170 |
+
mode: 'index'
|
| 171 |
+
}
|
| 172 |
+
}
|
| 173 |
+
});
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
async function loadUsersChart() {
|
| 177 |
+
const data = await fetchAPI('/api/chart/users?granularity=day');
|
| 178 |
+
if (!data || data.length === 0) return;
|
| 179 |
+
|
| 180 |
+
const ctx = document.getElementById('users-chart');
|
| 181 |
+
if (!ctx) return;
|
| 182 |
+
|
| 183 |
+
if (state.charts.users) {
|
| 184 |
+
state.charts.users.destroy();
|
| 185 |
+
}
|
| 186 |
+
|
| 187 |
+
state.charts.users = new Chart(ctx, {
|
| 188 |
+
type: 'line',
|
| 189 |
+
data: {
|
| 190 |
+
labels: data.map(d => d.label),
|
| 191 |
+
datasets: [{
|
| 192 |
+
label: 'Active Users',
|
| 193 |
+
data: data.map(d => d.value),
|
| 194 |
+
borderColor: '#28a745',
|
| 195 |
+
backgroundColor: 'rgba(40, 167, 69, 0.1)',
|
| 196 |
+
fill: true,
|
| 197 |
+
tension: 0.4,
|
| 198 |
+
pointRadius: 2,
|
| 199 |
+
pointHoverRadius: 5
|
| 200 |
+
}]
|
| 201 |
+
},
|
| 202 |
+
options: {
|
| 203 |
+
responsive: true,
|
| 204 |
+
maintainAspectRatio: false,
|
| 205 |
+
plugins: {
|
| 206 |
+
legend: { display: false }
|
| 207 |
+
},
|
| 208 |
+
scales: {
|
| 209 |
+
x: {
|
| 210 |
+
grid: { display: false },
|
| 211 |
+
ticks: { maxTicksLimit: 10 }
|
| 212 |
+
},
|
| 213 |
+
y: {
|
| 214 |
+
beginAtZero: true,
|
| 215 |
+
grid: { color: '#2d3748' }
|
| 216 |
+
}
|
| 217 |
+
}
|
| 218 |
+
}
|
| 219 |
+
});
|
| 220 |
+
}
|
| 221 |
+
|
| 222 |
+
async function loadHourlyChart() {
|
| 223 |
+
const data = await fetchAPI('/api/chart/hourly');
|
| 224 |
+
if (!data || data.length === 0) return;
|
| 225 |
+
|
| 226 |
+
const ctx = document.getElementById('hourly-chart');
|
| 227 |
+
if (!ctx) return;
|
| 228 |
+
|
| 229 |
+
if (state.charts.hourly) {
|
| 230 |
+
state.charts.hourly.destroy();
|
| 231 |
+
}
|
| 232 |
+
|
| 233 |
+
state.charts.hourly = new Chart(ctx, {
|
| 234 |
+
type: 'bar',
|
| 235 |
+
data: {
|
| 236 |
+
labels: data.map(d => d.label),
|
| 237 |
+
datasets: [{
|
| 238 |
+
label: 'Messages',
|
| 239 |
+
data: data.map(d => d.value),
|
| 240 |
+
backgroundColor: '#0088cc',
|
| 241 |
+
borderRadius: 4
|
| 242 |
+
}]
|
| 243 |
+
},
|
| 244 |
+
options: {
|
| 245 |
+
responsive: true,
|
| 246 |
+
maintainAspectRatio: false,
|
| 247 |
+
plugins: {
|
| 248 |
+
legend: { display: false }
|
| 249 |
+
},
|
| 250 |
+
scales: {
|
| 251 |
+
x: {
|
| 252 |
+
grid: { display: false },
|
| 253 |
+
ticks: { maxTicksLimit: 12 }
|
| 254 |
+
},
|
| 255 |
+
y: {
|
| 256 |
+
beginAtZero: true,
|
| 257 |
+
grid: { color: '#2d3748' }
|
| 258 |
+
}
|
| 259 |
+
}
|
| 260 |
+
}
|
| 261 |
+
});
|
| 262 |
+
}
|
| 263 |
+
|
| 264 |
+
async function loadDailyChart() {
|
| 265 |
+
const data = await fetchAPI('/api/chart/daily');
|
| 266 |
+
if (!data || data.length === 0) return;
|
| 267 |
+
|
| 268 |
+
const ctx = document.getElementById('daily-chart');
|
| 269 |
+
if (!ctx) return;
|
| 270 |
+
|
| 271 |
+
if (state.charts.daily) {
|
| 272 |
+
state.charts.daily.destroy();
|
| 273 |
+
}
|
| 274 |
+
|
| 275 |
+
const colors = [
|
| 276 |
+
'#dc3545', // Sunday - red
|
| 277 |
+
'#ffc107', // Monday - yellow
|
| 278 |
+
'#28a745', // Tuesday - green
|
| 279 |
+
'#17a2b8', // Wednesday - cyan
|
| 280 |
+
'#0088cc', // Thursday - blue
|
| 281 |
+
'#6f42c1', // Friday - purple
|
| 282 |
+
'#fd7e14' // Saturday - orange
|
| 283 |
+
];
|
| 284 |
+
|
| 285 |
+
state.charts.daily = new Chart(ctx, {
|
| 286 |
+
type: 'bar',
|
| 287 |
+
data: {
|
| 288 |
+
labels: data.map(d => d.label.substring(0, 3)),
|
| 289 |
+
datasets: [{
|
| 290 |
+
label: 'Messages',
|
| 291 |
+
data: data.map(d => d.value),
|
| 292 |
+
backgroundColor: colors,
|
| 293 |
+
borderRadius: 4
|
| 294 |
+
}]
|
| 295 |
+
},
|
| 296 |
+
options: {
|
| 297 |
+
responsive: true,
|
| 298 |
+
maintainAspectRatio: false,
|
| 299 |
+
plugins: {
|
| 300 |
+
legend: { display: false }
|
| 301 |
+
},
|
| 302 |
+
scales: {
|
| 303 |
+
x: {
|
| 304 |
+
grid: { display: false }
|
| 305 |
+
},
|
| 306 |
+
y: {
|
| 307 |
+
beginAtZero: true,
|
| 308 |
+
grid: { color: '#2d3748' }
|
| 309 |
+
}
|
| 310 |
+
}
|
| 311 |
+
}
|
| 312 |
+
});
|
| 313 |
+
}
|
| 314 |
+
|
| 315 |
+
async function loadHeatmap() {
|
| 316 |
+
const data = await fetchAPI('/api/chart/heatmap');
|
| 317 |
+
if (!data || !data.data) return;
|
| 318 |
+
|
| 319 |
+
const container = document.getElementById('heatmap');
|
| 320 |
+
if (!container) return;
|
| 321 |
+
|
| 322 |
+
// Find max value for color scaling
|
| 323 |
+
const maxValue = Math.max(...data.data.flat());
|
| 324 |
+
|
| 325 |
+
// Generate color based on intensity
|
| 326 |
+
function getColor(value) {
|
| 327 |
+
if (value === 0) return 'rgba(0, 136, 204, 0.1)';
|
| 328 |
+
const intensity = value / maxValue;
|
| 329 |
+
return `rgba(0, 136, 204, ${0.2 + intensity * 0.8})`;
|
| 330 |
+
}
|
| 331 |
+
|
| 332 |
+
let html = '<table class="heatmap-table"><thead><tr><th></th>';
|
| 333 |
+
|
| 334 |
+
// Hour headers
|
| 335 |
+
for (let h = 0; h < 24; h++) {
|
| 336 |
+
html += `<th>${h}</th>`;
|
| 337 |
+
}
|
| 338 |
+
html += '</tr></thead><tbody>';
|
| 339 |
+
|
| 340 |
+
// Day rows
|
| 341 |
+
data.days.forEach((day, dayIndex) => {
|
| 342 |
+
html += `<tr><td class="day-label">${day.substring(0, 3)}</td>`;
|
| 343 |
+
for (let h = 0; h < 24; h++) {
|
| 344 |
+
const value = data.data[dayIndex][h];
|
| 345 |
+
const color = getColor(value);
|
| 346 |
+
html += `<td><div class="heatmap-cell" style="background: ${color}" title="${day} ${h}:00 - ${value} messages"></div></td>`;
|
| 347 |
+
}
|
| 348 |
+
html += '</tr>';
|
| 349 |
+
});
|
| 350 |
+
|
| 351 |
+
html += '</tbody></table>';
|
| 352 |
+
container.innerHTML = html;
|
| 353 |
+
}
|
| 354 |
+
|
| 355 |
+
// ==========================================
|
| 356 |
+
// TOP LISTS
|
| 357 |
+
// ==========================================
|
| 358 |
+
|
| 359 |
+
async function loadTopUsers() {
|
| 360 |
+
const listElement = document.getElementById('top-users-list');
|
| 361 |
+
if (!listElement) return;
|
| 362 |
+
|
| 363 |
+
showLoading('top-users-list');
|
| 364 |
+
|
| 365 |
+
const data = await fetchAPI('/api/users?limit=10');
|
| 366 |
+
if (!data || !data.users || data.users.length === 0) {
|
| 367 |
+
showEmpty('top-users-list');
|
| 368 |
+
return;
|
| 369 |
+
}
|
| 370 |
+
|
| 371 |
+
let html = '';
|
| 372 |
+
data.users.forEach((user, index) => {
|
| 373 |
+
const rankClass = index === 0 ? 'gold' : index === 1 ? 'silver' : index === 2 ? 'bronze' : '';
|
| 374 |
+
const initial = user.name.charAt(0).toUpperCase();
|
| 375 |
+
|
| 376 |
+
html += `
|
| 377 |
+
<div class="list-item" onclick="window.location.href='/user/${user.user_id}'" style="cursor: pointer">
|
| 378 |
+
<div class="list-rank ${rankClass}">#${user.rank}</div>
|
| 379 |
+
<div class="user-avatar">${initial}</div>
|
| 380 |
+
<div class="list-info">
|
| 381 |
+
<div class="list-name">${escapeHtml(user.name)}</div>
|
| 382 |
+
<div class="list-subtitle">${user.percentage}% of total</div>
|
| 383 |
+
</div>
|
| 384 |
+
<div class="list-value">${formatNumber(user.messages)}</div>
|
| 385 |
+
</div>
|
| 386 |
+
`;
|
| 387 |
+
});
|
| 388 |
+
|
| 389 |
+
listElement.innerHTML = html;
|
| 390 |
+
}
|
| 391 |
+
|
| 392 |
+
async function loadTopWords() {
|
| 393 |
+
const listElement = document.getElementById('top-words-list');
|
| 394 |
+
if (!listElement) return;
|
| 395 |
+
|
| 396 |
+
showLoading('top-words-list');
|
| 397 |
+
|
| 398 |
+
const data = await fetchAPI('/api/top/words?limit=10');
|
| 399 |
+
if (!data || data.length === 0) {
|
| 400 |
+
showEmpty('top-words-list');
|
| 401 |
+
return;
|
| 402 |
+
}
|
| 403 |
+
|
| 404 |
+
const maxCount = data[0].count;
|
| 405 |
+
let html = '';
|
| 406 |
+
|
| 407 |
+
data.forEach((item, index) => {
|
| 408 |
+
const percentage = (item.count / maxCount * 100).toFixed(0);
|
| 409 |
+
html += `
|
| 410 |
+
<div class="list-item">
|
| 411 |
+
<div class="list-rank">#${index + 1}</div>
|
| 412 |
+
<div class="list-info">
|
| 413 |
+
<div class="list-name">${escapeHtml(item.word)}</div>
|
| 414 |
+
<div class="progress-bar">
|
| 415 |
+
<div class="progress-fill" style="width: ${percentage}%"></div>
|
| 416 |
+
</div>
|
| 417 |
+
</div>
|
| 418 |
+
<div class="list-value">${formatNumber(item.count)}</div>
|
| 419 |
+
</div>
|
| 420 |
+
`;
|
| 421 |
+
});
|
| 422 |
+
|
| 423 |
+
listElement.innerHTML = html;
|
| 424 |
+
}
|
| 425 |
+
|
| 426 |
+
async function loadTopDomains() {
|
| 427 |
+
const listElement = document.getElementById('top-domains-list');
|
| 428 |
+
if (!listElement) return;
|
| 429 |
+
|
| 430 |
+
showLoading('top-domains-list');
|
| 431 |
+
|
| 432 |
+
const data = await fetchAPI('/api/top/domains?limit=10');
|
| 433 |
+
if (!data || data.length === 0) {
|
| 434 |
+
showEmpty('top-domains-list');
|
| 435 |
+
return;
|
| 436 |
+
}
|
| 437 |
+
|
| 438 |
+
const maxCount = data[0].count;
|
| 439 |
+
let html = '';
|
| 440 |
+
|
| 441 |
+
data.forEach((item, index) => {
|
| 442 |
+
const percentage = (item.count / maxCount * 100).toFixed(0);
|
| 443 |
+
html += `
|
| 444 |
+
<div class="list-item">
|
| 445 |
+
<div class="list-rank">#${index + 1}</div>
|
| 446 |
+
<div class="list-info">
|
| 447 |
+
<div class="list-name">${escapeHtml(item.domain)}</div>
|
| 448 |
+
<div class="progress-bar">
|
| 449 |
+
<div class="progress-fill" style="width: ${percentage}%"></div>
|
| 450 |
+
</div>
|
| 451 |
+
</div>
|
| 452 |
+
<div class="list-value">${formatNumber(item.count)}</div>
|
| 453 |
+
</div>
|
| 454 |
+
`;
|
| 455 |
+
});
|
| 456 |
+
|
| 457 |
+
listElement.innerHTML = html;
|
| 458 |
+
}
|
| 459 |
+
|
| 460 |
+
// ==========================================
|
| 461 |
+
// USER MODAL
|
| 462 |
+
// ==========================================
|
| 463 |
+
|
| 464 |
+
async function openUserModal(userId) {
|
| 465 |
+
// Create modal if it doesn't exist
|
| 466 |
+
let modal = document.getElementById('user-modal');
|
| 467 |
+
if (!modal) {
|
| 468 |
+
modal = document.createElement('div');
|
| 469 |
+
modal.id = 'user-modal';
|
| 470 |
+
modal.className = 'modal-overlay';
|
| 471 |
+
modal.innerHTML = `
|
| 472 |
+
<div class="modal">
|
| 473 |
+
<div class="modal-header">
|
| 474 |
+
<h2>User Details</h2>
|
| 475 |
+
<button class="modal-close" onclick="closeUserModal()">×</button>
|
| 476 |
+
</div>
|
| 477 |
+
<div class="modal-body" id="user-modal-content">
|
| 478 |
+
<div class="loading"><div class="spinner"></div></div>
|
| 479 |
+
</div>
|
| 480 |
+
</div>
|
| 481 |
+
`;
|
| 482 |
+
document.body.appendChild(modal);
|
| 483 |
+
|
| 484 |
+
// Close on backdrop click
|
| 485 |
+
modal.addEventListener('click', (e) => {
|
| 486 |
+
if (e.target === modal) closeUserModal();
|
| 487 |
+
});
|
| 488 |
+
}
|
| 489 |
+
|
| 490 |
+
modal.classList.add('active');
|
| 491 |
+
document.getElementById('user-modal-content').innerHTML = '<div class="loading"><div class="spinner"></div></div>';
|
| 492 |
+
|
| 493 |
+
const data = await fetchAPI(`/api/user/${userId}`);
|
| 494 |
+
if (!data || data.error) {
|
| 495 |
+
document.getElementById('user-modal-content').innerHTML = '<div class="empty-state"><p>User not found</p></div>';
|
| 496 |
+
return;
|
| 497 |
+
}
|
| 498 |
+
|
| 499 |
+
const initial = data.name.charAt(0).toUpperCase();
|
| 500 |
+
|
| 501 |
+
document.getElementById('user-modal-content').innerHTML = `
|
| 502 |
+
<div class="user-profile">
|
| 503 |
+
<div class="user-profile-avatar">${initial}</div>
|
| 504 |
+
<div class="user-profile-info">
|
| 505 |
+
<h3>${escapeHtml(data.name)}</h3>
|
| 506 |
+
<p>Rank #${data.rank} • Member since ${formatDate(data.first_seen)}</p>
|
| 507 |
+
</div>
|
| 508 |
+
</div>
|
| 509 |
+
|
| 510 |
+
<div class="user-stats-grid">
|
| 511 |
+
<div class="user-stat">
|
| 512 |
+
<div class="user-stat-value">${formatNumber(data.messages)}</div>
|
| 513 |
+
<div class="user-stat-label">Messages</div>
|
| 514 |
+
</div>
|
| 515 |
+
<div class="user-stat">
|
| 516 |
+
<div class="user-stat-value">${formatNumber(data.characters)}</div>
|
| 517 |
+
<div class="user-stat-label">Characters</div>
|
| 518 |
+
</div>
|
| 519 |
+
<div class="user-stat">
|
| 520 |
+
<div class="user-stat-value">${data.daily_average}</div>
|
| 521 |
+
<div class="user-stat-label">Daily Avg</div>
|
| 522 |
+
</div>
|
| 523 |
+
<div class="user-stat">
|
| 524 |
+
<div class="user-stat-value">${formatNumber(data.links)}</div>
|
| 525 |
+
<div class="user-stat-label">Links</div>
|
| 526 |
+
</div>
|
| 527 |
+
<div class="user-stat">
|
| 528 |
+
<div class="user-stat-value">${formatNumber(data.media)}</div>
|
| 529 |
+
<div class="user-stat-label">Media</div>
|
| 530 |
+
</div>
|
| 531 |
+
<div class="user-stat">
|
| 532 |
+
<div class="user-stat-value">${data.active_days}</div>
|
| 533 |
+
<div class="user-stat-label">Active Days</div>
|
| 534 |
+
</div>
|
| 535 |
+
</div>
|
| 536 |
+
|
| 537 |
+
<h4 style="margin-bottom: 1rem;">Activity by Hour</h4>
|
| 538 |
+
<canvas id="user-hourly-chart" height="150"></canvas>
|
| 539 |
+
`;
|
| 540 |
+
|
| 541 |
+
// Render user's hourly chart
|
| 542 |
+
const ctx = document.getElementById('user-hourly-chart');
|
| 543 |
+
new Chart(ctx, {
|
| 544 |
+
type: 'bar',
|
| 545 |
+
data: {
|
| 546 |
+
labels: Array.from({length: 24}, (_, i) => `${i}:00`),
|
| 547 |
+
datasets: [{
|
| 548 |
+
data: data.hourly_activity,
|
| 549 |
+
backgroundColor: '#0088cc',
|
| 550 |
+
borderRadius: 2
|
| 551 |
+
}]
|
| 552 |
+
},
|
| 553 |
+
options: {
|
| 554 |
+
responsive: true,
|
| 555 |
+
maintainAspectRatio: false,
|
| 556 |
+
plugins: { legend: { display: false } },
|
| 557 |
+
scales: {
|
| 558 |
+
x: { grid: { display: false }, ticks: { maxTicksLimit: 12 } },
|
| 559 |
+
y: { beginAtZero: true, grid: { color: '#2d3748' } }
|
| 560 |
+
}
|
| 561 |
+
}
|
| 562 |
+
});
|
| 563 |
+
}
|
| 564 |
+
|
| 565 |
+
function closeUserModal() {
|
| 566 |
+
const modal = document.getElementById('user-modal');
|
| 567 |
+
if (modal) modal.classList.remove('active');
|
| 568 |
+
}
|
| 569 |
+
|
| 570 |
+
// ==========================================
|
| 571 |
+
// EXPORT FUNCTIONS
|
| 572 |
+
// ==========================================
|
| 573 |
+
|
| 574 |
+
function exportUsers() {
|
| 575 |
+
const timeframe = getTimeframe();
|
| 576 |
+
window.location.href = `/api/export/users?timeframe=${timeframe}`;
|
| 577 |
+
}
|
| 578 |
+
|
| 579 |
+
function exportMessages() {
|
| 580 |
+
const timeframe = getTimeframe();
|
| 581 |
+
window.location.href = `/api/export/messages?timeframe=${timeframe}`;
|
| 582 |
+
}
|
| 583 |
+
|
| 584 |
+
// ==========================================
|
| 585 |
+
// AUTO REFRESH
|
| 586 |
+
// ==========================================
|
| 587 |
+
|
| 588 |
+
function toggleAutoRefresh() {
|
| 589 |
+
if (state.autoRefresh) {
|
| 590 |
+
clearInterval(state.autoRefresh);
|
| 591 |
+
state.autoRefresh = null;
|
| 592 |
+
console.log('Auto-refresh disabled');
|
| 593 |
+
} else {
|
| 594 |
+
state.autoRefresh = setInterval(loadAllData, 60000); // Refresh every minute
|
| 595 |
+
console.log('Auto-refresh enabled (60s)');
|
| 596 |
+
}
|
| 597 |
+
}
|
| 598 |
+
|
| 599 |
+
// ==========================================
|
| 600 |
+
// UTILITY
|
| 601 |
+
// ==========================================
|
| 602 |
+
|
| 603 |
+
function escapeHtml(text) {
|
| 604 |
+
const div = document.createElement('div');
|
| 605 |
+
div.textContent = text;
|
| 606 |
+
return div.innerHTML;
|
| 607 |
+
}
|
| 608 |
+
|
| 609 |
+
// Keyboard shortcuts
|
| 610 |
+
document.addEventListener('keydown', (e) => {
|
| 611 |
+
// Escape to close modal
|
| 612 |
+
if (e.key === 'Escape') {
|
| 613 |
+
closeUserModal();
|
| 614 |
+
}
|
| 615 |
+
// R to refresh
|
| 616 |
+
if (e.key === 'r' && !e.ctrlKey && !e.metaKey) {
|
| 617 |
+
const activeElement = document.activeElement;
|
| 618 |
+
if (activeElement.tagName !== 'INPUT' && activeElement.tagName !== 'TEXTAREA') {
|
| 619 |
+
loadAllData();
|
| 620 |
+
}
|
| 621 |
+
}
|
| 622 |
+
});
|
templates/chat.html
ADDED
|
@@ -0,0 +1,831 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="he" dir="rtl">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>Chat View - Telegram Style</title>
|
| 7 |
+
<style>
|
| 8 |
+
/* ===== Telegram-like Chat Viewer ===== */
|
| 9 |
+
:root {
|
| 10 |
+
--bg-primary: #0e1621;
|
| 11 |
+
--bg-secondary: #17212b;
|
| 12 |
+
--bg-message: #182533;
|
| 13 |
+
--bg-hover: #1e2c3a;
|
| 14 |
+
--bg-reply: rgba(77, 184, 255, 0.08);
|
| 15 |
+
--bg-forward: rgba(100, 191, 71, 0.08);
|
| 16 |
+
--text-primary: #f5f5f5;
|
| 17 |
+
--text-secondary: #8b9fad;
|
| 18 |
+
--text-link: #6ab2f2;
|
| 19 |
+
--accent-blue: #6ab2f2;
|
| 20 |
+
--accent-green: #6dc264;
|
| 21 |
+
--border-reply: #6ab2f2;
|
| 22 |
+
--border-forward: #6dc264;
|
| 23 |
+
--date-badge: #1b2a38;
|
| 24 |
+
--nav-bg: #17212b;
|
| 25 |
+
--nav-border: #0e1621;
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
* { box-sizing: border-box; margin: 0; padding: 0; }
|
| 29 |
+
|
| 30 |
+
body {
|
| 31 |
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
|
| 32 |
+
font-size: 14px;
|
| 33 |
+
line-height: 1.5;
|
| 34 |
+
background-color: var(--bg-primary);
|
| 35 |
+
color: var(--text-primary);
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
/* ===== Navigation ===== */
|
| 39 |
+
.nav-bar {
|
| 40 |
+
position: fixed;
|
| 41 |
+
top: 0; left: 0; right: 0;
|
| 42 |
+
z-index: 100;
|
| 43 |
+
background-color: var(--nav-bg);
|
| 44 |
+
border-bottom: 1px solid var(--nav-border);
|
| 45 |
+
padding: 0 16px;
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
.nav-content {
|
| 49 |
+
max-width: 800px;
|
| 50 |
+
margin: 0 auto;
|
| 51 |
+
display: flex;
|
| 52 |
+
align-items: center;
|
| 53 |
+
justify-content: space-between;
|
| 54 |
+
height: 56px;
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
.nav-title {
|
| 58 |
+
font-size: 18px;
|
| 59 |
+
font-weight: 700;
|
| 60 |
+
color: var(--text-primary);
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
.nav-links { display: flex; gap: 4px; }
|
| 64 |
+
|
| 65 |
+
.nav-links a {
|
| 66 |
+
color: var(--accent-blue);
|
| 67 |
+
text-decoration: none;
|
| 68 |
+
padding: 8px 14px;
|
| 69 |
+
border-radius: 8px;
|
| 70 |
+
font-size: 13px;
|
| 71 |
+
transition: background 0.15s;
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
.nav-links a:hover { background-color: var(--bg-hover); }
|
| 75 |
+
|
| 76 |
+
.nav-links a.active {
|
| 77 |
+
background-color: var(--accent-blue);
|
| 78 |
+
color: var(--bg-primary);
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
/* ===== Chat Area ===== */
|
| 82 |
+
.chat-wrap {
|
| 83 |
+
padding-top: 56px;
|
| 84 |
+
min-height: 100vh;
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
.chat-body {
|
| 88 |
+
max-width: 680px;
|
| 89 |
+
margin: 0 auto;
|
| 90 |
+
padding: 0 12px 80px;
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
.history { padding: 8px 0; }
|
| 94 |
+
|
| 95 |
+
/* ===== Load More ===== */
|
| 96 |
+
.load-more {
|
| 97 |
+
text-align: center;
|
| 98 |
+
padding: 16px;
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
.load-more button {
|
| 102 |
+
padding: 10px 24px;
|
| 103 |
+
background-color: var(--bg-secondary);
|
| 104 |
+
color: var(--accent-blue);
|
| 105 |
+
border: 1px solid rgba(106, 178, 242, 0.3);
|
| 106 |
+
border-radius: 20px;
|
| 107 |
+
cursor: pointer;
|
| 108 |
+
font-size: 14px;
|
| 109 |
+
transition: all 0.15s;
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
.load-more button:hover {
|
| 113 |
+
background-color: var(--bg-hover);
|
| 114 |
+
border-color: var(--accent-blue);
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
.load-more button:disabled { opacity: 0.4; cursor: not-allowed; }
|
| 118 |
+
|
| 119 |
+
/* ===== Date Separator ===== */
|
| 120 |
+
.date-separator {
|
| 121 |
+
display: flex;
|
| 122 |
+
align-items: center;
|
| 123 |
+
justify-content: center;
|
| 124 |
+
padding: 12px 0;
|
| 125 |
+
position: sticky;
|
| 126 |
+
top: 60px;
|
| 127 |
+
z-index: 10;
|
| 128 |
+
}
|
| 129 |
+
|
| 130 |
+
.date-badge {
|
| 131 |
+
padding: 4px 12px;
|
| 132 |
+
background-color: var(--date-badge);
|
| 133 |
+
border-radius: 12px;
|
| 134 |
+
color: var(--text-secondary);
|
| 135 |
+
font-size: 13px;
|
| 136 |
+
font-weight: 500;
|
| 137 |
+
box-shadow: 0 1px 4px rgba(0,0,0,0.2);
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
/* ===== Message ===== */
|
| 141 |
+
.msg {
|
| 142 |
+
display: flex;
|
| 143 |
+
align-items: flex-start;
|
| 144 |
+
gap: 10px;
|
| 145 |
+
padding: 3px 8px;
|
| 146 |
+
border-radius: 8px;
|
| 147 |
+
transition: background 0.15s;
|
| 148 |
+
}
|
| 149 |
+
|
| 150 |
+
.msg:hover { background-color: var(--bg-hover); }
|
| 151 |
+
|
| 152 |
+
.msg.joined { padding-top: 1px; }
|
| 153 |
+
.msg.joined .avatar-wrap { visibility: hidden; height: 0; }
|
| 154 |
+
|
| 155 |
+
/* ===== Avatar ===== */
|
| 156 |
+
.avatar-wrap { flex-shrink: 0; padding-top: 2px; }
|
| 157 |
+
|
| 158 |
+
.avatar {
|
| 159 |
+
width: 40px;
|
| 160 |
+
height: 40px;
|
| 161 |
+
border-radius: 50%;
|
| 162 |
+
display: flex;
|
| 163 |
+
align-items: center;
|
| 164 |
+
justify-content: center;
|
| 165 |
+
font-weight: 600;
|
| 166 |
+
font-size: 15px;
|
| 167 |
+
color: #fff;
|
| 168 |
+
cursor: pointer;
|
| 169 |
+
}
|
| 170 |
+
|
| 171 |
+
.avatar:hover { filter: brightness(1.15); }
|
| 172 |
+
|
| 173 |
+
/* 8 Telegram avatar colors */
|
| 174 |
+
.c1 { background: #ff5555; }
|
| 175 |
+
.c2 { background: #64bf47; }
|
| 176 |
+
.c3 { background: #ffab00; }
|
| 177 |
+
.c4 { background: #4f9cd9; }
|
| 178 |
+
.c5 { background: #9884e8; }
|
| 179 |
+
.c6 { background: #e671a5; }
|
| 180 |
+
.c7 { background: #47bcd1; }
|
| 181 |
+
.c8 { background: #ff8c44; }
|
| 182 |
+
|
| 183 |
+
/* Name colors to match avatars */
|
| 184 |
+
.name-c1 { color: #ff5555; }
|
| 185 |
+
.name-c2 { color: #64bf47; }
|
| 186 |
+
.name-c3 { color: #ffab00; }
|
| 187 |
+
.name-c4 { color: #4f9cd9; }
|
| 188 |
+
.name-c5 { color: #9884e8; }
|
| 189 |
+
.name-c6 { color: #e671a5; }
|
| 190 |
+
.name-c7 { color: #47bcd1; }
|
| 191 |
+
.name-c8 { color: #ff8c44; }
|
| 192 |
+
|
| 193 |
+
/* ===== Message Body ===== */
|
| 194 |
+
.msg-body {
|
| 195 |
+
flex: 1;
|
| 196 |
+
min-width: 0;
|
| 197 |
+
}
|
| 198 |
+
|
| 199 |
+
/* Header: name + time */
|
| 200 |
+
.msg-header {
|
| 201 |
+
display: flex;
|
| 202 |
+
align-items: baseline;
|
| 203 |
+
gap: 8px;
|
| 204 |
+
margin-bottom: 2px;
|
| 205 |
+
}
|
| 206 |
+
|
| 207 |
+
.msg-name {
|
| 208 |
+
font-weight: 600;
|
| 209 |
+
font-size: 14px;
|
| 210 |
+
cursor: pointer;
|
| 211 |
+
}
|
| 212 |
+
|
| 213 |
+
.msg-name:hover { text-decoration: underline; }
|
| 214 |
+
|
| 215 |
+
.msg-time {
|
| 216 |
+
color: var(--text-secondary);
|
| 217 |
+
font-size: 12px;
|
| 218 |
+
white-space: nowrap;
|
| 219 |
+
}
|
| 220 |
+
|
| 221 |
+
.msg-edited {
|
| 222 |
+
color: var(--text-secondary);
|
| 223 |
+
font-size: 11px;
|
| 224 |
+
font-style: italic;
|
| 225 |
+
}
|
| 226 |
+
|
| 227 |
+
/* ===== Reply Block ===== */
|
| 228 |
+
.reply-block {
|
| 229 |
+
display: flex;
|
| 230 |
+
gap: 0;
|
| 231 |
+
margin: 4px 0 6px;
|
| 232 |
+
padding: 6px 10px;
|
| 233 |
+
border-radius: 6px;
|
| 234 |
+
border-right: 3px solid var(--border-reply);
|
| 235 |
+
background: var(--bg-reply);
|
| 236 |
+
cursor: pointer;
|
| 237 |
+
overflow: hidden;
|
| 238 |
+
transition: background 0.15s;
|
| 239 |
+
}
|
| 240 |
+
|
| 241 |
+
.reply-block:hover { background: rgba(106, 178, 242, 0.15); }
|
| 242 |
+
|
| 243 |
+
.reply-content { min-width: 0; }
|
| 244 |
+
|
| 245 |
+
.reply-name {
|
| 246 |
+
font-weight: 600;
|
| 247 |
+
font-size: 13px;
|
| 248 |
+
color: var(--accent-blue);
|
| 249 |
+
}
|
| 250 |
+
|
| 251 |
+
.reply-text {
|
| 252 |
+
font-size: 13px;
|
| 253 |
+
color: var(--text-secondary);
|
| 254 |
+
white-space: nowrap;
|
| 255 |
+
overflow: hidden;
|
| 256 |
+
text-overflow: ellipsis;
|
| 257 |
+
max-width: 400px;
|
| 258 |
+
}
|
| 259 |
+
|
| 260 |
+
/* ===== Forward Block ===== */
|
| 261 |
+
.forward-block {
|
| 262 |
+
margin: 4px 0 6px;
|
| 263 |
+
padding: 6px 10px;
|
| 264 |
+
border-radius: 6px;
|
| 265 |
+
border-right: 3px solid var(--border-forward);
|
| 266 |
+
background: var(--bg-forward);
|
| 267 |
+
}
|
| 268 |
+
|
| 269 |
+
.forward-label {
|
| 270 |
+
font-size: 12px;
|
| 271 |
+
color: var(--text-secondary);
|
| 272 |
+
}
|
| 273 |
+
|
| 274 |
+
.forward-name {
|
| 275 |
+
font-weight: 600;
|
| 276 |
+
font-size: 13px;
|
| 277 |
+
color: var(--accent-green);
|
| 278 |
+
}
|
| 279 |
+
|
| 280 |
+
/* ===== Message Text ===== */
|
| 281 |
+
.msg-text {
|
| 282 |
+
word-wrap: break-word;
|
| 283 |
+
overflow-wrap: break-word;
|
| 284 |
+
line-height: 1.55;
|
| 285 |
+
unicode-bidi: plaintext;
|
| 286 |
+
text-align: start;
|
| 287 |
+
white-space: pre-wrap;
|
| 288 |
+
}
|
| 289 |
+
|
| 290 |
+
.msg-text a {
|
| 291 |
+
color: var(--text-link);
|
| 292 |
+
text-decoration: none;
|
| 293 |
+
}
|
| 294 |
+
|
| 295 |
+
.msg-text a:hover { text-decoration: underline; }
|
| 296 |
+
|
| 297 |
+
/* Mention */
|
| 298 |
+
.mention {
|
| 299 |
+
color: var(--accent-blue);
|
| 300 |
+
font-weight: 500;
|
| 301 |
+
cursor: pointer;
|
| 302 |
+
}
|
| 303 |
+
|
| 304 |
+
.mention:hover { text-decoration: underline; }
|
| 305 |
+
|
| 306 |
+
/* Hashtag */
|
| 307 |
+
.hashtag {
|
| 308 |
+
color: var(--accent-blue);
|
| 309 |
+
cursor: pointer;
|
| 310 |
+
}
|
| 311 |
+
|
| 312 |
+
/* Code */
|
| 313 |
+
.msg-text code {
|
| 314 |
+
font-family: 'Consolas', 'Monaco', 'Courier New', monospace;
|
| 315 |
+
background: rgba(255,255,255,0.06);
|
| 316 |
+
padding: 1px 5px;
|
| 317 |
+
border-radius: 4px;
|
| 318 |
+
font-size: 13px;
|
| 319 |
+
}
|
| 320 |
+
|
| 321 |
+
.msg-text pre {
|
| 322 |
+
background: rgba(0,0,0,0.3);
|
| 323 |
+
padding: 10px 12px;
|
| 324 |
+
border-radius: 8px;
|
| 325 |
+
margin: 6px 0;
|
| 326 |
+
overflow-x: auto;
|
| 327 |
+
font-family: 'Consolas', 'Monaco', 'Courier New', monospace;
|
| 328 |
+
font-size: 13px;
|
| 329 |
+
line-height: 1.4;
|
| 330 |
+
}
|
| 331 |
+
|
| 332 |
+
/* ===== Entities (links, media) ===== */
|
| 333 |
+
.entity-links {
|
| 334 |
+
margin-top: 6px;
|
| 335 |
+
display: flex;
|
| 336 |
+
flex-wrap: wrap;
|
| 337 |
+
gap: 6px;
|
| 338 |
+
}
|
| 339 |
+
|
| 340 |
+
.entity-link {
|
| 341 |
+
display: inline-flex;
|
| 342 |
+
align-items: center;
|
| 343 |
+
gap: 5px;
|
| 344 |
+
padding: 4px 10px;
|
| 345 |
+
background: rgba(106, 178, 242, 0.1);
|
| 346 |
+
border-radius: 8px;
|
| 347 |
+
font-size: 13px;
|
| 348 |
+
color: var(--text-link);
|
| 349 |
+
text-decoration: none;
|
| 350 |
+
max-width: 350px;
|
| 351 |
+
overflow: hidden;
|
| 352 |
+
text-overflow: ellipsis;
|
| 353 |
+
white-space: nowrap;
|
| 354 |
+
transition: background 0.15s;
|
| 355 |
+
}
|
| 356 |
+
|
| 357 |
+
.entity-link:hover {
|
| 358 |
+
background: rgba(106, 178, 242, 0.2);
|
| 359 |
+
text-decoration: none;
|
| 360 |
+
}
|
| 361 |
+
|
| 362 |
+
.entity-link .link-icon { font-size: 11px; }
|
| 363 |
+
.entity-link .link-domain {
|
| 364 |
+
opacity: 0.7;
|
| 365 |
+
font-size: 12px;
|
| 366 |
+
}
|
| 367 |
+
|
| 368 |
+
/* ===== Media Badge ===== */
|
| 369 |
+
.media-badge {
|
| 370 |
+
display: inline-flex;
|
| 371 |
+
align-items: center;
|
| 372 |
+
gap: 6px;
|
| 373 |
+
padding: 5px 10px;
|
| 374 |
+
background: var(--bg-secondary);
|
| 375 |
+
border-radius: 8px;
|
| 376 |
+
margin-top: 6px;
|
| 377 |
+
font-size: 13px;
|
| 378 |
+
color: var(--text-secondary);
|
| 379 |
+
}
|
| 380 |
+
|
| 381 |
+
.media-badge .media-icon { font-size: 14px; }
|
| 382 |
+
|
| 383 |
+
/* ===== Time for joined messages ===== */
|
| 384 |
+
.msg-time-inline {
|
| 385 |
+
color: var(--text-secondary);
|
| 386 |
+
font-size: 12px;
|
| 387 |
+
margin-top: 2px;
|
| 388 |
+
opacity: 0;
|
| 389 |
+
transition: opacity 0.15s;
|
| 390 |
+
}
|
| 391 |
+
|
| 392 |
+
.msg:hover .msg-time-inline { opacity: 1; }
|
| 393 |
+
|
| 394 |
+
/* ===== Selected (highlight on go-to) ===== */
|
| 395 |
+
.msg.selected {
|
| 396 |
+
background-color: rgba(106, 178, 242, 0.15);
|
| 397 |
+
transition: background-color 2s ease;
|
| 398 |
+
}
|
| 399 |
+
|
| 400 |
+
/* ===== Scroll-to-bottom ===== */
|
| 401 |
+
.scroll-btn {
|
| 402 |
+
position: fixed;
|
| 403 |
+
bottom: 24px;
|
| 404 |
+
left: 50%;
|
| 405 |
+
transform: translateX(-50%);
|
| 406 |
+
width: 44px;
|
| 407 |
+
height: 44px;
|
| 408 |
+
background: var(--bg-secondary);
|
| 409 |
+
color: var(--accent-blue);
|
| 410 |
+
border: 1px solid rgba(106, 178, 242, 0.3);
|
| 411 |
+
border-radius: 50%;
|
| 412 |
+
cursor: pointer;
|
| 413 |
+
font-size: 20px;
|
| 414 |
+
display: none;
|
| 415 |
+
align-items: center;
|
| 416 |
+
justify-content: center;
|
| 417 |
+
box-shadow: 0 2px 12px rgba(0,0,0,0.4);
|
| 418 |
+
z-index: 80;
|
| 419 |
+
transition: all 0.15s;
|
| 420 |
+
}
|
| 421 |
+
|
| 422 |
+
.scroll-btn.visible { display: flex; }
|
| 423 |
+
.scroll-btn:hover {
|
| 424 |
+
background: var(--accent-blue);
|
| 425 |
+
color: var(--bg-primary);
|
| 426 |
+
}
|
| 427 |
+
|
| 428 |
+
/* ===== Loading ===== */
|
| 429 |
+
.loading {
|
| 430 |
+
text-align: center;
|
| 431 |
+
padding: 24px;
|
| 432 |
+
color: var(--text-secondary);
|
| 433 |
+
}
|
| 434 |
+
|
| 435 |
+
.spinner {
|
| 436 |
+
display: inline-block;
|
| 437 |
+
width: 24px; height: 24px;
|
| 438 |
+
border: 3px solid var(--bg-secondary);
|
| 439 |
+
border-top-color: var(--accent-blue);
|
| 440 |
+
border-radius: 50%;
|
| 441 |
+
animation: spin 1s linear infinite;
|
| 442 |
+
margin-bottom: 8px;
|
| 443 |
+
}
|
| 444 |
+
|
| 445 |
+
@keyframes spin { to { transform: rotate(360deg); } }
|
| 446 |
+
|
| 447 |
+
/* ===== Toast ===== */
|
| 448 |
+
.toast {
|
| 449 |
+
position: fixed;
|
| 450 |
+
bottom: 80px;
|
| 451 |
+
left: 50%;
|
| 452 |
+
transform: translateX(-50%);
|
| 453 |
+
background: rgba(0,0,0,0.85);
|
| 454 |
+
color: #fff;
|
| 455 |
+
padding: 10px 24px;
|
| 456 |
+
border-radius: 20px;
|
| 457 |
+
z-index: 200;
|
| 458 |
+
opacity: 0;
|
| 459 |
+
transition: opacity 0.3s;
|
| 460 |
+
font-size: 13px;
|
| 461 |
+
}
|
| 462 |
+
|
| 463 |
+
.toast.visible { opacity: 1; }
|
| 464 |
+
|
| 465 |
+
/* ===== Responsive ===== */
|
| 466 |
+
@media (max-width: 700px) {
|
| 467 |
+
.nav-links a { padding: 6px 8px; font-size: 12px; }
|
| 468 |
+
.chat-body { padding: 0 4px 80px; }
|
| 469 |
+
.reply-text { max-width: 200px; }
|
| 470 |
+
.entity-link { max-width: 250px; }
|
| 471 |
+
}
|
| 472 |
+
</style>
|
| 473 |
+
</head>
|
| 474 |
+
<body>
|
| 475 |
+
<nav class="nav-bar">
|
| 476 |
+
<div class="nav-content">
|
| 477 |
+
<div class="nav-title">Chat View</div>
|
| 478 |
+
<div class="nav-links">
|
| 479 |
+
<a href="/">Overview</a>
|
| 480 |
+
<a href="/users">Users</a>
|
| 481 |
+
<a href="/chat" class="active">Chat</a>
|
| 482 |
+
<a href="/search">Search</a>
|
| 483 |
+
<a href="/moderation">Moderation</a>
|
| 484 |
+
<a href="/settings">Settings</a>
|
| 485 |
+
</div>
|
| 486 |
+
</div>
|
| 487 |
+
</nav>
|
| 488 |
+
|
| 489 |
+
<div class="chat-wrap">
|
| 490 |
+
<div class="chat-body">
|
| 491 |
+
<div class="history" id="history">
|
| 492 |
+
<div class="load-more" id="load-more-top">
|
| 493 |
+
<button onclick="loadOlderMessages()" id="load-older-btn">↑ Load earlier messages</button>
|
| 494 |
+
</div>
|
| 495 |
+
<div id="messages-container"></div>
|
| 496 |
+
<div class="loading" id="loading">
|
| 497 |
+
<div class="spinner"></div>
|
| 498 |
+
<div>Loading messages...</div>
|
| 499 |
+
</div>
|
| 500 |
+
</div>
|
| 501 |
+
</div>
|
| 502 |
+
</div>
|
| 503 |
+
|
| 504 |
+
<button class="scroll-btn" id="scroll-bottom" onclick="scrollToBottom()">↓</button>
|
| 505 |
+
<div class="toast" id="toast"></div>
|
| 506 |
+
|
| 507 |
+
<script>
|
| 508 |
+
// ===== State =====
|
| 509 |
+
let allMessages = [];
|
| 510 |
+
let oldestOffset = 0;
|
| 511 |
+
let totalMessages = 0;
|
| 512 |
+
let loading = false;
|
| 513 |
+
let initialLoad = true;
|
| 514 |
+
const BATCH_SIZE = 100;
|
| 515 |
+
const userColors = {};
|
| 516 |
+
|
| 517 |
+
// ===== Utilities =====
|
| 518 |
+
function getUserColor(userId) {
|
| 519 |
+
if (!userColors[userId]) {
|
| 520 |
+
let hash = 0;
|
| 521 |
+
const str = String(userId);
|
| 522 |
+
for (let i = 0; i < str.length; i++) {
|
| 523 |
+
hash = str.charCodeAt(i) + ((hash << 5) - hash);
|
| 524 |
+
}
|
| 525 |
+
userColors[userId] = (Math.abs(hash) % 8) + 1;
|
| 526 |
+
}
|
| 527 |
+
return userColors[userId];
|
| 528 |
+
}
|
| 529 |
+
|
| 530 |
+
function getInitials(name) {
|
| 531 |
+
if (!name) return '?';
|
| 532 |
+
const parts = name.trim().split(/\s+/);
|
| 533 |
+
if (parts.length >= 2) return (parts[0][0] + parts[1][0]).toUpperCase();
|
| 534 |
+
return name.substring(0, 2).toUpperCase();
|
| 535 |
+
}
|
| 536 |
+
|
| 537 |
+
function formatDate(dateStr) {
|
| 538 |
+
if (!dateStr) return '';
|
| 539 |
+
const d = new Date(dateStr);
|
| 540 |
+
const months = ['January','February','March','April','May','June',
|
| 541 |
+
'July','August','September','October','November','December'];
|
| 542 |
+
return `${months[d.getMonth()]} ${d.getDate()}, ${d.getFullYear()}`;
|
| 543 |
+
}
|
| 544 |
+
|
| 545 |
+
function formatTime(dateStr) {
|
| 546 |
+
if (!dateStr) return '';
|
| 547 |
+
const d = new Date(dateStr);
|
| 548 |
+
return d.toLocaleTimeString('en-US', { hour: '2-digit', minute: '2-digit', hour12: false });
|
| 549 |
+
}
|
| 550 |
+
|
| 551 |
+
function escapeHtml(text) {
|
| 552 |
+
if (!text) return '';
|
| 553 |
+
const div = document.createElement('div');
|
| 554 |
+
div.textContent = text;
|
| 555 |
+
return div.innerHTML;
|
| 556 |
+
}
|
| 557 |
+
|
| 558 |
+
function getDomain(url) {
|
| 559 |
+
try {
|
| 560 |
+
return new URL(url).hostname.replace('www.', '');
|
| 561 |
+
} catch {
|
| 562 |
+
return url.substring(0, 30);
|
| 563 |
+
}
|
| 564 |
+
}
|
| 565 |
+
|
| 566 |
+
// ===== Text Formatting =====
|
| 567 |
+
function formatMessageText(text, entities) {
|
| 568 |
+
if (!text) return '';
|
| 569 |
+
let html = escapeHtml(text);
|
| 570 |
+
|
| 571 |
+
// Auto-linkify URLs in text
|
| 572 |
+
html = html.replace(
|
| 573 |
+
/(https?:\/\/[^\s<]+)/g,
|
| 574 |
+
'<a href="$1" target="_blank" rel="noopener">$1</a>'
|
| 575 |
+
);
|
| 576 |
+
|
| 577 |
+
// Highlight @mentions
|
| 578 |
+
html = html.replace(
|
| 579 |
+
/@(\w{3,})/g,
|
| 580 |
+
'<span class="mention">@$1</span>'
|
| 581 |
+
);
|
| 582 |
+
|
| 583 |
+
// Highlight #hashtags
|
| 584 |
+
html = html.replace(
|
| 585 |
+
/#(\w{2,})/g,
|
| 586 |
+
'<span class="hashtag">#$1</span>'
|
| 587 |
+
);
|
| 588 |
+
|
| 589 |
+
// Convert newlines to <br>
|
| 590 |
+
html = html.replace(/\n/g, '<br>');
|
| 591 |
+
|
| 592 |
+
return html;
|
| 593 |
+
}
|
| 594 |
+
|
| 595 |
+
// ===== Render Message =====
|
| 596 |
+
function renderMessage(msg, prevMsg) {
|
| 597 |
+
const frag = document.createDocumentFragment();
|
| 598 |
+
|
| 599 |
+
// Date separator
|
| 600 |
+
const msgDate = msg.date ? msg.date.split('T')[0] : '';
|
| 601 |
+
const prevDate = prevMsg && prevMsg.date ? prevMsg.date.split('T')[0] : '';
|
| 602 |
+
|
| 603 |
+
if (msgDate !== prevDate) {
|
| 604 |
+
const sep = document.createElement('div');
|
| 605 |
+
sep.className = 'date-separator';
|
| 606 |
+
sep.innerHTML = `<div class="date-badge">${formatDate(msg.date)}</div>`;
|
| 607 |
+
frag.appendChild(sep);
|
| 608 |
+
}
|
| 609 |
+
|
| 610 |
+
// Joined message? (same user, same day, within 5 minutes)
|
| 611 |
+
const isJoined = prevMsg &&
|
| 612 |
+
prevMsg.from_id === msg.from_id &&
|
| 613 |
+
msgDate === prevDate &&
|
| 614 |
+
!msg.forwarded_from &&
|
| 615 |
+
!prevMsg.forwarded_from &&
|
| 616 |
+
timeDiffMinutes(prevMsg.date, msg.date) < 5;
|
| 617 |
+
|
| 618 |
+
const colorNum = getUserColor(msg.from_id);
|
| 619 |
+
const el = document.createElement('div');
|
| 620 |
+
el.className = `msg${isJoined ? ' joined' : ''}`;
|
| 621 |
+
el.id = `message${msg.message_id || msg.id}`;
|
| 622 |
+
|
| 623 |
+
let html = '';
|
| 624 |
+
|
| 625 |
+
// Avatar
|
| 626 |
+
html += `<div class="avatar-wrap">
|
| 627 |
+
<div class="avatar c${colorNum}">${getInitials(msg.from_name)}</div>
|
| 628 |
+
</div>`;
|
| 629 |
+
|
| 630 |
+
// Body
|
| 631 |
+
html += '<div class="msg-body">';
|
| 632 |
+
|
| 633 |
+
// Header (name + time) - only for first message in group
|
| 634 |
+
if (!isJoined) {
|
| 635 |
+
html += `<div class="msg-header">
|
| 636 |
+
<span class="msg-name name-c${colorNum}">${escapeHtml(msg.from_name || 'Unknown')}</span>
|
| 637 |
+
<span class="msg-time">${formatTime(msg.date)}</span>
|
| 638 |
+
${msg.is_edited ? '<span class="msg-edited">edited</span>' : ''}
|
| 639 |
+
</div>`;
|
| 640 |
+
}
|
| 641 |
+
|
| 642 |
+
// Forward block
|
| 643 |
+
if (msg.forwarded_from) {
|
| 644 |
+
html += `<div class="forward-block">
|
| 645 |
+
<div class="forward-label">Forwarded message</div>
|
| 646 |
+
<div class="forward-name">${escapeHtml(msg.forwarded_from)}</div>
|
| 647 |
+
</div>`;
|
| 648 |
+
}
|
| 649 |
+
|
| 650 |
+
// Reply block
|
| 651 |
+
if (msg.reply_to_message_id && msg.reply_to_name) {
|
| 652 |
+
html += `<div class="reply-block" onclick="goToMessage(${msg.reply_to_message_id})">
|
| 653 |
+
<div class="reply-content">
|
| 654 |
+
<div class="reply-name">${escapeHtml(msg.reply_to_name)}</div>
|
| 655 |
+
<div class="reply-text">${escapeHtml(msg.reply_to_text || '')}</div>
|
| 656 |
+
</div>
|
| 657 |
+
</div>`;
|
| 658 |
+
}
|
| 659 |
+
|
| 660 |
+
// Message text
|
| 661 |
+
if (msg.text) {
|
| 662 |
+
html += `<div class="msg-text">${formatMessageText(msg.text, msg.entities)}</div>`;
|
| 663 |
+
}
|
| 664 |
+
|
| 665 |
+
// Entity links (extracted from DB)
|
| 666 |
+
const links = (msg.entities || []).filter(e => e.type === 'link' || e.type === 'text_link');
|
| 667 |
+
if (links.length > 0) {
|
| 668 |
+
html += '<div class="entity-links">';
|
| 669 |
+
const seen = new Set();
|
| 670 |
+
for (const link of links) {
|
| 671 |
+
const url = link.value;
|
| 672 |
+
if (seen.has(url)) continue;
|
| 673 |
+
seen.add(url);
|
| 674 |
+
// Skip if the link is already visible in the text
|
| 675 |
+
if (msg.text && msg.text.includes(url)) continue;
|
| 676 |
+
const domain = getDomain(url);
|
| 677 |
+
html += `<a class="entity-link" href="${escapeHtml(url)}" target="_blank" rel="noopener">
|
| 678 |
+
<span class="link-icon">🔗</span>
|
| 679 |
+
<span class="link-domain">${escapeHtml(domain)}</span>
|
| 680 |
+
</a>`;
|
| 681 |
+
}
|
| 682 |
+
html += '</div>';
|
| 683 |
+
}
|
| 684 |
+
|
| 685 |
+
// Media badge
|
| 686 |
+
if (msg.has_media) {
|
| 687 |
+
const icon = msg.has_photo ? '📷' : '📎';
|
| 688 |
+
const label = msg.has_photo ? 'Photo' : 'Media';
|
| 689 |
+
html += `<div class="media-badge"><span class="media-icon">${icon}</span> ${label}</div>`;
|
| 690 |
+
}
|
| 691 |
+
|
| 692 |
+
// Time for joined messages (shown on hover)
|
| 693 |
+
if (isJoined) {
|
| 694 |
+
html += `<div class="msg-time-inline">${formatTime(msg.date)}${msg.is_edited ? ' · edited' : ''}</div>`;
|
| 695 |
+
}
|
| 696 |
+
|
| 697 |
+
html += '</div>'; // close msg-body
|
| 698 |
+
el.innerHTML = html;
|
| 699 |
+
frag.appendChild(el);
|
| 700 |
+
|
| 701 |
+
return frag;
|
| 702 |
+
}
|
| 703 |
+
|
| 704 |
+
function timeDiffMinutes(dateStr1, dateStr2) {
|
| 705 |
+
if (!dateStr1 || !dateStr2) return 999;
|
| 706 |
+
return Math.abs(new Date(dateStr2) - new Date(dateStr1)) / 60000;
|
| 707 |
+
}
|
| 708 |
+
|
| 709 |
+
// ===== Render All =====
|
| 710 |
+
function renderAllMessages() {
|
| 711 |
+
const container = document.getElementById('messages-container');
|
| 712 |
+
container.innerHTML = '';
|
| 713 |
+
for (let i = 0; i < allMessages.length; i++) {
|
| 714 |
+
container.appendChild(renderMessage(allMessages[i], i > 0 ? allMessages[i-1] : null));
|
| 715 |
+
}
|
| 716 |
+
}
|
| 717 |
+
|
| 718 |
+
// ===== Load Messages =====
|
| 719 |
+
async function loadInitialMessages() {
|
| 720 |
+
if (loading) return;
|
| 721 |
+
loading = true;
|
| 722 |
+
document.getElementById('loading').style.display = 'block';
|
| 723 |
+
|
| 724 |
+
try {
|
| 725 |
+
const countRes = await fetch('/api/chat/messages?limit=1&offset=0');
|
| 726 |
+
const countData = await countRes.json();
|
| 727 |
+
totalMessages = countData.total || 0;
|
| 728 |
+
|
| 729 |
+
if (totalMessages === 0) {
|
| 730 |
+
document.getElementById('loading').style.display = 'none';
|
| 731 |
+
document.getElementById('messages-container').innerHTML =
|
| 732 |
+
'<div class="date-separator"><div class="date-badge">No messages found</div></div>';
|
| 733 |
+
loading = false;
|
| 734 |
+
return;
|
| 735 |
+
}
|
| 736 |
+
|
| 737 |
+
const startOffset = Math.max(0, totalMessages - BATCH_SIZE);
|
| 738 |
+
oldestOffset = startOffset;
|
| 739 |
+
|
| 740 |
+
const res = await fetch(`/api/chat/messages?limit=${BATCH_SIZE}&offset=${startOffset}`);
|
| 741 |
+
const data = await res.json();
|
| 742 |
+
|
| 743 |
+
if (data.messages && data.messages.length > 0) {
|
| 744 |
+
allMessages = data.messages;
|
| 745 |
+
renderAllMessages();
|
| 746 |
+
setTimeout(() => { scrollToBottom(); initialLoad = false; }, 100);
|
| 747 |
+
|
| 748 |
+
if (oldestOffset <= 0) {
|
| 749 |
+
document.getElementById('load-more-top').style.display = 'none';
|
| 750 |
+
}
|
| 751 |
+
}
|
| 752 |
+
} catch (e) {
|
| 753 |
+
console.error('Error loading messages:', e);
|
| 754 |
+
showToast('Error loading messages');
|
| 755 |
+
}
|
| 756 |
+
|
| 757 |
+
loading = false;
|
| 758 |
+
document.getElementById('loading').style.display = 'none';
|
| 759 |
+
}
|
| 760 |
+
|
| 761 |
+
async function loadOlderMessages() {
|
| 762 |
+
if (loading || oldestOffset <= 0) return;
|
| 763 |
+
loading = true;
|
| 764 |
+
document.getElementById('load-older-btn').disabled = true;
|
| 765 |
+
|
| 766 |
+
try {
|
| 767 |
+
const newOffset = Math.max(0, oldestOffset - BATCH_SIZE);
|
| 768 |
+
const limit = oldestOffset - newOffset;
|
| 769 |
+
|
| 770 |
+
const res = await fetch(`/api/chat/messages?limit=${limit}&offset=${newOffset}`);
|
| 771 |
+
const data = await res.json();
|
| 772 |
+
|
| 773 |
+
if (data.messages && data.messages.length > 0) {
|
| 774 |
+
const container = document.getElementById('messages-container');
|
| 775 |
+
const scrollBefore = container.scrollHeight;
|
| 776 |
+
|
| 777 |
+
allMessages = [...data.messages, ...allMessages];
|
| 778 |
+
oldestOffset = newOffset;
|
| 779 |
+
renderAllMessages();
|
| 780 |
+
|
| 781 |
+
const scrollAfter = container.scrollHeight;
|
| 782 |
+
window.scrollBy(0, scrollAfter - scrollBefore);
|
| 783 |
+
|
| 784 |
+
if (oldestOffset <= 0) {
|
| 785 |
+
document.getElementById('load-more-top').style.display = 'none';
|
| 786 |
+
}
|
| 787 |
+
}
|
| 788 |
+
} catch (e) {
|
| 789 |
+
console.error('Error loading older messages:', e);
|
| 790 |
+
showToast('Error loading messages');
|
| 791 |
+
}
|
| 792 |
+
|
| 793 |
+
loading = false;
|
| 794 |
+
document.getElementById('load-older-btn').disabled = false;
|
| 795 |
+
}
|
| 796 |
+
|
| 797 |
+
// ===== Navigation =====
|
| 798 |
+
function goToMessage(messageId) {
|
| 799 |
+
const el = document.getElementById(`message${messageId}`);
|
| 800 |
+
if (el) {
|
| 801 |
+
el.scrollIntoView({ behavior: 'smooth', block: 'center' });
|
| 802 |
+
el.classList.add('selected');
|
| 803 |
+
setTimeout(() => el.classList.remove('selected'), 2500);
|
| 804 |
+
} else {
|
| 805 |
+
showToast('Message not in current view');
|
| 806 |
+
}
|
| 807 |
+
}
|
| 808 |
+
|
| 809 |
+
function scrollToBottom() {
|
| 810 |
+
window.scrollTo({ top: document.body.scrollHeight, behavior: 'smooth' });
|
| 811 |
+
}
|
| 812 |
+
|
| 813 |
+
function showToast(message) {
|
| 814 |
+
const toast = document.getElementById('toast');
|
| 815 |
+
toast.textContent = message;
|
| 816 |
+
toast.classList.add('visible');
|
| 817 |
+
setTimeout(() => toast.classList.remove('visible'), 3000);
|
| 818 |
+
}
|
| 819 |
+
|
| 820 |
+
// Scroll button visibility
|
| 821 |
+
window.addEventListener('scroll', () => {
|
| 822 |
+
const btn = document.getElementById('scroll-bottom');
|
| 823 |
+
const dist = document.body.scrollHeight - window.scrollY - window.innerHeight;
|
| 824 |
+
btn.classList.toggle('visible', dist > 500);
|
| 825 |
+
});
|
| 826 |
+
|
| 827 |
+
// ===== Init =====
|
| 828 |
+
loadInitialMessages();
|
| 829 |
+
</script>
|
| 830 |
+
</body>
|
| 831 |
+
</html>
|
templates/index.html
ADDED
|
@@ -0,0 +1,223 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>Telegram Analytics Dashboard</title>
|
| 7 |
+
<link rel="stylesheet" href="/static/css/style.css">
|
| 8 |
+
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
| 9 |
+
</head>
|
| 10 |
+
<body>
|
| 11 |
+
<!-- Sidebar -->
|
| 12 |
+
<nav class="sidebar">
|
| 13 |
+
<div class="logo">
|
| 14 |
+
<span class="logo-icon">📊</span>
|
| 15 |
+
<span class="logo-text">TG Analytics</span>
|
| 16 |
+
</div>
|
| 17 |
+
<ul class="nav-menu">
|
| 18 |
+
<li class="nav-item active">
|
| 19 |
+
<a href="/" class="nav-link">
|
| 20 |
+
<span class="icon">📈</span>
|
| 21 |
+
<span>Overview</span>
|
| 22 |
+
</a>
|
| 23 |
+
</li>
|
| 24 |
+
<li class="nav-item">
|
| 25 |
+
<a href="/users" class="nav-link">
|
| 26 |
+
<span class="icon">👥</span>
|
| 27 |
+
<span>Users</span>
|
| 28 |
+
</a>
|
| 29 |
+
</li>
|
| 30 |
+
<li class="nav-item">
|
| 31 |
+
<a href="/chat" class="nav-link">
|
| 32 |
+
<span class="icon">💬</span>
|
| 33 |
+
<span>Chat</span>
|
| 34 |
+
</a>
|
| 35 |
+
</li>
|
| 36 |
+
<li class="nav-item">
|
| 37 |
+
<a href="/search" class="nav-link">
|
| 38 |
+
<span class="icon">🔍</span>
|
| 39 |
+
<span>Search</span>
|
| 40 |
+
</a>
|
| 41 |
+
</li>
|
| 42 |
+
<li class="nav-item">
|
| 43 |
+
<a href="/moderation" class="nav-link">
|
| 44 |
+
<span class="icon">🛡️</span>
|
| 45 |
+
<span>Moderation</span>
|
| 46 |
+
</a>
|
| 47 |
+
</li>
|
| 48 |
+
<li class="nav-item">
|
| 49 |
+
<a href="/settings" class="nav-link">
|
| 50 |
+
<span class="icon">⚙️</span>
|
| 51 |
+
<span>Settings</span>
|
| 52 |
+
</a>
|
| 53 |
+
</li>
|
| 54 |
+
</ul>
|
| 55 |
+
<div class="sidebar-footer">
|
| 56 |
+
<div class="export-buttons">
|
| 57 |
+
<button onclick="exportUsers()" class="btn btn-sm">📥 Export Users</button>
|
| 58 |
+
<button onclick="exportMessages()" class="btn btn-sm">📥 Export Messages</button>
|
| 59 |
+
</div>
|
| 60 |
+
</div>
|
| 61 |
+
</nav>
|
| 62 |
+
|
| 63 |
+
<!-- Main Content -->
|
| 64 |
+
<main class="main-content">
|
| 65 |
+
<!-- Header -->
|
| 66 |
+
<header class="header">
|
| 67 |
+
<h1>Dashboard Overview</h1>
|
| 68 |
+
<div class="header-controls">
|
| 69 |
+
<select id="timeframe" class="select" onchange="loadAllData()">
|
| 70 |
+
<option value="today">Today</option>
|
| 71 |
+
<option value="yesterday">Yesterday</option>
|
| 72 |
+
<option value="week">This Week</option>
|
| 73 |
+
<option value="month" selected>This Month</option>
|
| 74 |
+
<option value="year">This Year</option>
|
| 75 |
+
<option value="all">All Time</option>
|
| 76 |
+
</select>
|
| 77 |
+
<button onclick="loadAllData()" class="btn btn-primary">🔄 Refresh</button>
|
| 78 |
+
</div>
|
| 79 |
+
</header>
|
| 80 |
+
|
| 81 |
+
<!-- Stats Cards -->
|
| 82 |
+
<section class="stats-grid">
|
| 83 |
+
<div class="stat-card">
|
| 84 |
+
<div class="stat-icon">💬</div>
|
| 85 |
+
<div class="stat-content">
|
| 86 |
+
<div class="stat-value" id="total-messages">-</div>
|
| 87 |
+
<div class="stat-label">Total Messages</div>
|
| 88 |
+
</div>
|
| 89 |
+
</div>
|
| 90 |
+
<div class="stat-card">
|
| 91 |
+
<div class="stat-icon">👤</div>
|
| 92 |
+
<div class="stat-content">
|
| 93 |
+
<div class="stat-value" id="active-users">-</div>
|
| 94 |
+
<div class="stat-label">Active Users</div>
|
| 95 |
+
</div>
|
| 96 |
+
</div>
|
| 97 |
+
<div class="stat-card">
|
| 98 |
+
<div class="stat-icon">📅</div>
|
| 99 |
+
<div class="stat-content">
|
| 100 |
+
<div class="stat-value" id="messages-per-day">-</div>
|
| 101 |
+
<div class="stat-label">Messages/Day</div>
|
| 102 |
+
</div>
|
| 103 |
+
</div>
|
| 104 |
+
<div class="stat-card">
|
| 105 |
+
<div class="stat-icon">🔗</div>
|
| 106 |
+
<div class="stat-content">
|
| 107 |
+
<div class="stat-value" id="links-count">-</div>
|
| 108 |
+
<div class="stat-label">Links Shared</div>
|
| 109 |
+
</div>
|
| 110 |
+
</div>
|
| 111 |
+
<div class="stat-card">
|
| 112 |
+
<div class="stat-icon">🖼️</div>
|
| 113 |
+
<div class="stat-content">
|
| 114 |
+
<div class="stat-value" id="media-count">-</div>
|
| 115 |
+
<div class="stat-label">Media Shared</div>
|
| 116 |
+
</div>
|
| 117 |
+
</div>
|
| 118 |
+
<div class="stat-card">
|
| 119 |
+
<div class="stat-icon">↩️</div>
|
| 120 |
+
<div class="stat-content">
|
| 121 |
+
<div class="stat-value" id="replies-count">-</div>
|
| 122 |
+
<div class="stat-label">Replies</div>
|
| 123 |
+
</div>
|
| 124 |
+
</div>
|
| 125 |
+
</section>
|
| 126 |
+
|
| 127 |
+
<!-- Charts Row 1 -->
|
| 128 |
+
<section class="charts-row">
|
| 129 |
+
<div class="chart-card large">
|
| 130 |
+
<div class="chart-header">
|
| 131 |
+
<h3>Message Volume</h3>
|
| 132 |
+
<select id="messages-granularity" class="select-sm" onchange="loadMessagesChart()">
|
| 133 |
+
<option value="hour">Hourly</option>
|
| 134 |
+
<option value="day" selected>Daily</option>
|
| 135 |
+
<option value="week">Weekly</option>
|
| 136 |
+
</select>
|
| 137 |
+
</div>
|
| 138 |
+
<div class="chart-container">
|
| 139 |
+
<canvas id="messages-chart"></canvas>
|
| 140 |
+
</div>
|
| 141 |
+
</div>
|
| 142 |
+
<div class="chart-card">
|
| 143 |
+
<div class="chart-header">
|
| 144 |
+
<h3>Active Users</h3>
|
| 145 |
+
</div>
|
| 146 |
+
<div class="chart-container">
|
| 147 |
+
<canvas id="users-chart"></canvas>
|
| 148 |
+
</div>
|
| 149 |
+
</div>
|
| 150 |
+
</section>
|
| 151 |
+
|
| 152 |
+
<!-- Charts Row 2 -->
|
| 153 |
+
<section class="charts-row">
|
| 154 |
+
<div class="chart-card">
|
| 155 |
+
<div class="chart-header">
|
| 156 |
+
<h3>Activity by Hour</h3>
|
| 157 |
+
</div>
|
| 158 |
+
<div class="chart-container">
|
| 159 |
+
<canvas id="hourly-chart"></canvas>
|
| 160 |
+
</div>
|
| 161 |
+
</div>
|
| 162 |
+
<div class="chart-card">
|
| 163 |
+
<div class="chart-header">
|
| 164 |
+
<h3>Activity by Day</h3>
|
| 165 |
+
</div>
|
| 166 |
+
<div class="chart-container">
|
| 167 |
+
<canvas id="daily-chart"></canvas>
|
| 168 |
+
</div>
|
| 169 |
+
</div>
|
| 170 |
+
</section>
|
| 171 |
+
|
| 172 |
+
<!-- Heatmap -->
|
| 173 |
+
<section class="charts-row">
|
| 174 |
+
<div class="chart-card full-width">
|
| 175 |
+
<div class="chart-header">
|
| 176 |
+
<h3>Activity Heatmap</h3>
|
| 177 |
+
<span class="chart-subtitle">Hour of Day vs Day of Week</span>
|
| 178 |
+
</div>
|
| 179 |
+
<div class="heatmap-container" id="heatmap">
|
| 180 |
+
<!-- Heatmap will be rendered here -->
|
| 181 |
+
</div>
|
| 182 |
+
</div>
|
| 183 |
+
</section>
|
| 184 |
+
|
| 185 |
+
<!-- Top Lists -->
|
| 186 |
+
<section class="lists-row">
|
| 187 |
+
<div class="list-card">
|
| 188 |
+
<div class="list-header">
|
| 189 |
+
<h3>🏆 Top Users</h3>
|
| 190 |
+
<a href="/users" class="link">View All →</a>
|
| 191 |
+
</div>
|
| 192 |
+
<div class="list-content" id="top-users-list">
|
| 193 |
+
<!-- List will be rendered here -->
|
| 194 |
+
</div>
|
| 195 |
+
</div>
|
| 196 |
+
<div class="list-card">
|
| 197 |
+
<div class="list-header">
|
| 198 |
+
<h3>🔤 Top Words</h3>
|
| 199 |
+
</div>
|
| 200 |
+
<div class="list-content" id="top-words-list">
|
| 201 |
+
<!-- List will be rendered here -->
|
| 202 |
+
</div>
|
| 203 |
+
</div>
|
| 204 |
+
<div class="list-card">
|
| 205 |
+
<div class="list-header">
|
| 206 |
+
<h3>🌐 Top Domains</h3>
|
| 207 |
+
</div>
|
| 208 |
+
<div class="list-content" id="top-domains-list">
|
| 209 |
+
<!-- List will be rendered here -->
|
| 210 |
+
</div>
|
| 211 |
+
</div>
|
| 212 |
+
</section>
|
| 213 |
+
</main>
|
| 214 |
+
|
| 215 |
+
<script src="/static/js/dashboard.js"></script>
|
| 216 |
+
<script>
|
| 217 |
+
// Initialize
|
| 218 |
+
document.addEventListener('DOMContentLoaded', () => {
|
| 219 |
+
loadAllData();
|
| 220 |
+
});
|
| 221 |
+
</script>
|
| 222 |
+
</body>
|
| 223 |
+
</html>
|
templates/moderation.html
ADDED
|
@@ -0,0 +1,459 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>Moderation - Telegram Analytics</title>
|
| 7 |
+
<link rel="stylesheet" href="/static/css/style.css">
|
| 8 |
+
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
| 9 |
+
</head>
|
| 10 |
+
<body>
|
| 11 |
+
<!-- Sidebar -->
|
| 12 |
+
<nav class="sidebar">
|
| 13 |
+
<div class="logo">
|
| 14 |
+
<span class="logo-icon">📊</span>
|
| 15 |
+
<span class="logo-text">TG Analytics</span>
|
| 16 |
+
</div>
|
| 17 |
+
<ul class="nav-menu">
|
| 18 |
+
<li class="nav-item">
|
| 19 |
+
<a href="/" class="nav-link">
|
| 20 |
+
<span class="icon">📈</span>
|
| 21 |
+
<span>Overview</span>
|
| 22 |
+
</a>
|
| 23 |
+
</li>
|
| 24 |
+
<li class="nav-item">
|
| 25 |
+
<a href="/users" class="nav-link">
|
| 26 |
+
<span class="icon">👥</span>
|
| 27 |
+
<span>Users</span>
|
| 28 |
+
</a>
|
| 29 |
+
</li>
|
| 30 |
+
<li class="nav-item">
|
| 31 |
+
<a href="/chat" class="nav-link">
|
| 32 |
+
<span class="icon">💬</span>
|
| 33 |
+
<span>Chat</span>
|
| 34 |
+
</a>
|
| 35 |
+
</li>
|
| 36 |
+
<li class="nav-item">
|
| 37 |
+
<a href="/search" class="nav-link">
|
| 38 |
+
<span class="icon">🔍</span>
|
| 39 |
+
<span>Search</span>
|
| 40 |
+
</a>
|
| 41 |
+
</li>
|
| 42 |
+
<li class="nav-item active">
|
| 43 |
+
<a href="/moderation" class="nav-link">
|
| 44 |
+
<span class="icon">🛡️</span>
|
| 45 |
+
<span>Moderation</span>
|
| 46 |
+
</a>
|
| 47 |
+
</li>
|
| 48 |
+
<li class="nav-item">
|
| 49 |
+
<a href="/settings" class="nav-link">
|
| 50 |
+
<span class="icon">⚙️</span>
|
| 51 |
+
<span>Settings</span>
|
| 52 |
+
</a>
|
| 53 |
+
</li>
|
| 54 |
+
</ul>
|
| 55 |
+
</nav>
|
| 56 |
+
|
| 57 |
+
<!-- Main Content -->
|
| 58 |
+
<main class="main-content">
|
| 59 |
+
<!-- Header -->
|
| 60 |
+
<header class="header">
|
| 61 |
+
<h1>Moderation & Content Analytics</h1>
|
| 62 |
+
<div class="header-controls">
|
| 63 |
+
<select id="timeframe" class="select" onchange="loadAllData()">
|
| 64 |
+
<option value="today">Today</option>
|
| 65 |
+
<option value="yesterday">Yesterday</option>
|
| 66 |
+
<option value="week">This Week</option>
|
| 67 |
+
<option value="month" selected>This Month</option>
|
| 68 |
+
<option value="year">This Year</option>
|
| 69 |
+
<option value="all">All Time</option>
|
| 70 |
+
</select>
|
| 71 |
+
<button onclick="loadAllData()" class="btn btn-primary">🔄 Refresh</button>
|
| 72 |
+
</div>
|
| 73 |
+
</header>
|
| 74 |
+
|
| 75 |
+
<!-- Content Stats -->
|
| 76 |
+
<section class="stats-grid">
|
| 77 |
+
<div class="stat-card">
|
| 78 |
+
<div class="stat-icon">🔗</div>
|
| 79 |
+
<div class="stat-content">
|
| 80 |
+
<div class="stat-value" id="total-links">-</div>
|
| 81 |
+
<div class="stat-label">Links Shared</div>
|
| 82 |
+
</div>
|
| 83 |
+
</div>
|
| 84 |
+
<div class="stat-card">
|
| 85 |
+
<div class="stat-icon">🖼️</div>
|
| 86 |
+
<div class="stat-content">
|
| 87 |
+
<div class="stat-value" id="total-media">-</div>
|
| 88 |
+
<div class="stat-label">Media Shared</div>
|
| 89 |
+
</div>
|
| 90 |
+
</div>
|
| 91 |
+
<div class="stat-card">
|
| 92 |
+
<div class="stat-icon">@</div>
|
| 93 |
+
<div class="stat-content">
|
| 94 |
+
<div class="stat-value" id="total-mentions">-</div>
|
| 95 |
+
<div class="stat-label">Mentions</div>
|
| 96 |
+
</div>
|
| 97 |
+
</div>
|
| 98 |
+
<div class="stat-card">
|
| 99 |
+
<div class="stat-icon">↪️</div>
|
| 100 |
+
<div class="stat-content">
|
| 101 |
+
<div class="stat-value" id="total-forwards">-</div>
|
| 102 |
+
<div class="stat-label">Forwards</div>
|
| 103 |
+
</div>
|
| 104 |
+
</div>
|
| 105 |
+
</section>
|
| 106 |
+
|
| 107 |
+
<!-- Charts Row -->
|
| 108 |
+
<section class="charts-row">
|
| 109 |
+
<div class="chart-card">
|
| 110 |
+
<div class="chart-header">
|
| 111 |
+
<h3>Top Shared Domains</h3>
|
| 112 |
+
</div>
|
| 113 |
+
<div class="chart-container">
|
| 114 |
+
<canvas id="domains-chart"></canvas>
|
| 115 |
+
</div>
|
| 116 |
+
</div>
|
| 117 |
+
<div class="chart-card">
|
| 118 |
+
<div class="chart-header">
|
| 119 |
+
<h3>Content Type Distribution</h3>
|
| 120 |
+
</div>
|
| 121 |
+
<div class="chart-container">
|
| 122 |
+
<canvas id="content-chart"></canvas>
|
| 123 |
+
</div>
|
| 124 |
+
</div>
|
| 125 |
+
</section>
|
| 126 |
+
|
| 127 |
+
<!-- Lists Row -->
|
| 128 |
+
<section class="lists-row">
|
| 129 |
+
<!-- Top Domains List -->
|
| 130 |
+
<div class="list-card">
|
| 131 |
+
<div class="list-header">
|
| 132 |
+
<h3>🌐 Top Domains</h3>
|
| 133 |
+
</div>
|
| 134 |
+
<div class="list-content" id="domains-list">
|
| 135 |
+
<div class="loading"><div class="spinner"></div></div>
|
| 136 |
+
</div>
|
| 137 |
+
</div>
|
| 138 |
+
|
| 139 |
+
<!-- Top Mentions List -->
|
| 140 |
+
<div class="list-card">
|
| 141 |
+
<div class="list-header">
|
| 142 |
+
<h3>@ Top Mentions</h3>
|
| 143 |
+
</div>
|
| 144 |
+
<div class="list-content" id="mentions-list">
|
| 145 |
+
<div class="loading"><div class="spinner"></div></div>
|
| 146 |
+
</div>
|
| 147 |
+
</div>
|
| 148 |
+
|
| 149 |
+
<!-- Top Words List -->
|
| 150 |
+
<div class="list-card">
|
| 151 |
+
<div class="list-header">
|
| 152 |
+
<h3>🔤 Top Words</h3>
|
| 153 |
+
</div>
|
| 154 |
+
<div class="list-content" id="words-list">
|
| 155 |
+
<div class="loading"><div class="spinner"></div></div>
|
| 156 |
+
</div>
|
| 157 |
+
</div>
|
| 158 |
+
</section>
|
| 159 |
+
|
| 160 |
+
<!-- Link Sharers -->
|
| 161 |
+
<section class="chart-card full-width">
|
| 162 |
+
<div class="chart-header">
|
| 163 |
+
<h3>Top Link Sharers</h3>
|
| 164 |
+
</div>
|
| 165 |
+
<div style="overflow-x: auto;">
|
| 166 |
+
<table class="users-table">
|
| 167 |
+
<thead>
|
| 168 |
+
<tr>
|
| 169 |
+
<th style="width: 60px;">Rank</th>
|
| 170 |
+
<th>User</th>
|
| 171 |
+
<th style="width: 120px;">Links</th>
|
| 172 |
+
<th style="width: 120px;">Media</th>
|
| 173 |
+
<th style="width: 120px;">Messages</th>
|
| 174 |
+
<th style="width: 150px;">Link Rate</th>
|
| 175 |
+
</tr>
|
| 176 |
+
</thead>
|
| 177 |
+
<tbody id="link-sharers-body">
|
| 178 |
+
<tr>
|
| 179 |
+
<td colspan="6" class="loading">
|
| 180 |
+
<div class="spinner"></div>
|
| 181 |
+
</td>
|
| 182 |
+
</tr>
|
| 183 |
+
</tbody>
|
| 184 |
+
</table>
|
| 185 |
+
</div>
|
| 186 |
+
</section>
|
| 187 |
+
</main>
|
| 188 |
+
|
| 189 |
+
<script>
|
| 190 |
+
// Chart instances
|
| 191 |
+
let domainsChart = null;
|
| 192 |
+
let contentChart = null;
|
| 193 |
+
|
| 194 |
+
// Initialize
|
| 195 |
+
document.addEventListener('DOMContentLoaded', () => {
|
| 196 |
+
loadAllData();
|
| 197 |
+
});
|
| 198 |
+
|
| 199 |
+
async function loadAllData() {
|
| 200 |
+
await Promise.all([
|
| 201 |
+
loadOverview(),
|
| 202 |
+
loadDomains(),
|
| 203 |
+
loadMentions(),
|
| 204 |
+
loadWords(),
|
| 205 |
+
loadLinkSharers()
|
| 206 |
+
]);
|
| 207 |
+
}
|
| 208 |
+
|
| 209 |
+
async function loadOverview() {
|
| 210 |
+
const timeframe = document.getElementById('timeframe').value;
|
| 211 |
+
|
| 212 |
+
try {
|
| 213 |
+
const response = await fetch(`/api/overview?timeframe=${timeframe}`);
|
| 214 |
+
const data = await response.json();
|
| 215 |
+
|
| 216 |
+
document.getElementById('total-links').textContent = formatNumber(data.links_count);
|
| 217 |
+
document.getElementById('total-media').textContent = formatNumber(data.media_count);
|
| 218 |
+
document.getElementById('total-mentions').textContent = formatNumber(data.mentions_count);
|
| 219 |
+
document.getElementById('total-forwards').textContent = formatNumber(data.forwards_count);
|
| 220 |
+
|
| 221 |
+
// Update content distribution chart
|
| 222 |
+
renderContentChart(data);
|
| 223 |
+
} catch (error) {
|
| 224 |
+
console.error('Error loading overview:', error);
|
| 225 |
+
}
|
| 226 |
+
}
|
| 227 |
+
|
| 228 |
+
async function loadDomains() {
|
| 229 |
+
const timeframe = document.getElementById('timeframe').value;
|
| 230 |
+
const listDiv = document.getElementById('domains-list');
|
| 231 |
+
|
| 232 |
+
try {
|
| 233 |
+
const response = await fetch(`/api/top/domains?timeframe=${timeframe}&limit=15`);
|
| 234 |
+
const data = await response.json();
|
| 235 |
+
|
| 236 |
+
if (data.length === 0) {
|
| 237 |
+
listDiv.innerHTML = '<div class="empty-state">No domains found</div>';
|
| 238 |
+
return;
|
| 239 |
+
}
|
| 240 |
+
|
| 241 |
+
listDiv.innerHTML = data.map((item, i) => `
|
| 242 |
+
<div class="list-item">
|
| 243 |
+
<span class="list-rank ${i < 3 ? ['gold', 'silver', 'bronze'][i] : ''}">#${i + 1}</span>
|
| 244 |
+
<div class="list-info">
|
| 245 |
+
<div class="list-name">${escapeHtml(item.domain)}</div>
|
| 246 |
+
</div>
|
| 247 |
+
<span class="list-value">${formatNumber(item.count)}</span>
|
| 248 |
+
</div>
|
| 249 |
+
`).join('');
|
| 250 |
+
|
| 251 |
+
// Render domains chart
|
| 252 |
+
renderDomainsChart(data.slice(0, 8));
|
| 253 |
+
} catch (error) {
|
| 254 |
+
listDiv.innerHTML = '<div class="empty-state">Error loading domains</div>';
|
| 255 |
+
}
|
| 256 |
+
}
|
| 257 |
+
|
| 258 |
+
async function loadMentions() {
|
| 259 |
+
const timeframe = document.getElementById('timeframe').value;
|
| 260 |
+
const listDiv = document.getElementById('mentions-list');
|
| 261 |
+
|
| 262 |
+
try {
|
| 263 |
+
const response = await fetch(`/api/top/mentions?timeframe=${timeframe}&limit=15`);
|
| 264 |
+
const data = await response.json();
|
| 265 |
+
|
| 266 |
+
if (data.length === 0) {
|
| 267 |
+
listDiv.innerHTML = '<div class="empty-state">No mentions found</div>';
|
| 268 |
+
return;
|
| 269 |
+
}
|
| 270 |
+
|
| 271 |
+
listDiv.innerHTML = data.map((item, i) => `
|
| 272 |
+
<div class="list-item">
|
| 273 |
+
<span class="list-rank ${i < 3 ? ['gold', 'silver', 'bronze'][i] : ''}">#${i + 1}</span>
|
| 274 |
+
<div class="list-info">
|
| 275 |
+
<div class="list-name">@${escapeHtml(item.mention)}</div>
|
| 276 |
+
</div>
|
| 277 |
+
<span class="list-value">${formatNumber(item.count)}</span>
|
| 278 |
+
</div>
|
| 279 |
+
`).join('');
|
| 280 |
+
} catch (error) {
|
| 281 |
+
listDiv.innerHTML = '<div class="empty-state">Error loading mentions</div>';
|
| 282 |
+
}
|
| 283 |
+
}
|
| 284 |
+
|
| 285 |
+
async function loadWords() {
|
| 286 |
+
const timeframe = document.getElementById('timeframe').value;
|
| 287 |
+
const listDiv = document.getElementById('words-list');
|
| 288 |
+
|
| 289 |
+
try {
|
| 290 |
+
const response = await fetch(`/api/top/words?timeframe=${timeframe}&limit=15`);
|
| 291 |
+
const data = await response.json();
|
| 292 |
+
|
| 293 |
+
if (data.length === 0) {
|
| 294 |
+
listDiv.innerHTML = '<div class="empty-state">No words found</div>';
|
| 295 |
+
return;
|
| 296 |
+
}
|
| 297 |
+
|
| 298 |
+
listDiv.innerHTML = data.map((item, i) => `
|
| 299 |
+
<div class="list-item">
|
| 300 |
+
<span class="list-rank ${i < 3 ? ['gold', 'silver', 'bronze'][i] : ''}">#${i + 1}</span>
|
| 301 |
+
<div class="list-info">
|
| 302 |
+
<div class="list-name">${escapeHtml(item.word)}</div>
|
| 303 |
+
</div>
|
| 304 |
+
<span class="list-value">${formatNumber(item.count)}</span>
|
| 305 |
+
</div>
|
| 306 |
+
`).join('');
|
| 307 |
+
} catch (error) {
|
| 308 |
+
listDiv.innerHTML = '<div class="empty-state">Error loading words</div>';
|
| 309 |
+
}
|
| 310 |
+
}
|
| 311 |
+
|
| 312 |
+
async function loadLinkSharers() {
|
| 313 |
+
const timeframe = document.getElementById('timeframe').value;
|
| 314 |
+
const tbody = document.getElementById('link-sharers-body');
|
| 315 |
+
|
| 316 |
+
try {
|
| 317 |
+
const response = await fetch(`/api/users?timeframe=${timeframe}&limit=10`);
|
| 318 |
+
const data = await response.json();
|
| 319 |
+
|
| 320 |
+
// Sort by links
|
| 321 |
+
const users = data.users.sort((a, b) => b.links - a.links).slice(0, 10);
|
| 322 |
+
|
| 323 |
+
if (users.length === 0) {
|
| 324 |
+
tbody.innerHTML = '<tr><td colspan="6" class="empty-state">No data found</td></tr>';
|
| 325 |
+
return;
|
| 326 |
+
}
|
| 327 |
+
|
| 328 |
+
tbody.innerHTML = users.map((user, i) => {
|
| 329 |
+
const linkRate = user.messages > 0 ? ((user.links / user.messages) * 100).toFixed(1) : 0;
|
| 330 |
+
const rankClass = i === 0 ? 'gold' : i === 1 ? 'silver' : i === 2 ? 'bronze' : '';
|
| 331 |
+
|
| 332 |
+
return `
|
| 333 |
+
<tr>
|
| 334 |
+
<td><span class="list-rank ${rankClass}">#${i + 1}</span></td>
|
| 335 |
+
<td>
|
| 336 |
+
<div class="user-cell">
|
| 337 |
+
<div class="user-avatar">${user.name.charAt(0).toUpperCase()}</div>
|
| 338 |
+
<div>
|
| 339 |
+
<div class="list-name">${escapeHtml(user.name)}</div>
|
| 340 |
+
</div>
|
| 341 |
+
</div>
|
| 342 |
+
</td>
|
| 343 |
+
<td><strong>${formatNumber(user.links)}</strong></td>
|
| 344 |
+
<td>${formatNumber(user.media)}</td>
|
| 345 |
+
<td>${formatNumber(user.messages)}</td>
|
| 346 |
+
<td>
|
| 347 |
+
${linkRate}%
|
| 348 |
+
<div class="progress-bar">
|
| 349 |
+
<div class="progress-fill" style="width: ${Math.min(linkRate * 2, 100)}%"></div>
|
| 350 |
+
</div>
|
| 351 |
+
</td>
|
| 352 |
+
</tr>
|
| 353 |
+
`;
|
| 354 |
+
}).join('');
|
| 355 |
+
} catch (error) {
|
| 356 |
+
tbody.innerHTML = '<tr><td colspan="6" class="empty-state">Error loading data</td></tr>';
|
| 357 |
+
}
|
| 358 |
+
}
|
| 359 |
+
|
| 360 |
+
function renderDomainsChart(data) {
|
| 361 |
+
const ctx = document.getElementById('domains-chart').getContext('2d');
|
| 362 |
+
|
| 363 |
+
if (domainsChart) domainsChart.destroy();
|
| 364 |
+
|
| 365 |
+
domainsChart = new Chart(ctx, {
|
| 366 |
+
type: 'bar',
|
| 367 |
+
data: {
|
| 368 |
+
labels: data.map(d => d.domain.substring(0, 15)),
|
| 369 |
+
datasets: [{
|
| 370 |
+
data: data.map(d => d.count),
|
| 371 |
+
backgroundColor: [
|
| 372 |
+
'rgba(0, 136, 204, 0.8)',
|
| 373 |
+
'rgba(40, 167, 69, 0.8)',
|
| 374 |
+
'rgba(255, 193, 7, 0.8)',
|
| 375 |
+
'rgba(220, 53, 69, 0.8)',
|
| 376 |
+
'rgba(23, 162, 184, 0.8)',
|
| 377 |
+
'rgba(108, 117, 125, 0.8)',
|
| 378 |
+
'rgba(111, 66, 193, 0.8)',
|
| 379 |
+
'rgba(253, 126, 20, 0.8)'
|
| 380 |
+
],
|
| 381 |
+
borderWidth: 0
|
| 382 |
+
}]
|
| 383 |
+
},
|
| 384 |
+
options: {
|
| 385 |
+
indexAxis: 'y',
|
| 386 |
+
responsive: true,
|
| 387 |
+
maintainAspectRatio: false,
|
| 388 |
+
plugins: { legend: { display: false } },
|
| 389 |
+
scales: {
|
| 390 |
+
x: {
|
| 391 |
+
grid: { color: 'rgba(255, 255, 255, 0.1)' },
|
| 392 |
+
ticks: { color: '#a0aec0' }
|
| 393 |
+
},
|
| 394 |
+
y: {
|
| 395 |
+
grid: { display: false },
|
| 396 |
+
ticks: { color: '#a0aec0' }
|
| 397 |
+
}
|
| 398 |
+
}
|
| 399 |
+
}
|
| 400 |
+
});
|
| 401 |
+
}
|
| 402 |
+
|
| 403 |
+
function renderContentChart(data) {
|
| 404 |
+
const ctx = document.getElementById('content-chart').getContext('2d');
|
| 405 |
+
|
| 406 |
+
if (contentChart) contentChart.destroy();
|
| 407 |
+
|
| 408 |
+
const textOnly = data.total_messages - data.links_count - data.media_count;
|
| 409 |
+
|
| 410 |
+
contentChart = new Chart(ctx, {
|
| 411 |
+
type: 'doughnut',
|
| 412 |
+
data: {
|
| 413 |
+
labels: ['Text Only', 'With Links', 'With Media', 'Replies', 'Forwards'],
|
| 414 |
+
datasets: [{
|
| 415 |
+
data: [
|
| 416 |
+
Math.max(0, textOnly),
|
| 417 |
+
data.links_count,
|
| 418 |
+
data.media_count,
|
| 419 |
+
data.replies_count,
|
| 420 |
+
data.forwards_count
|
| 421 |
+
],
|
| 422 |
+
backgroundColor: [
|
| 423 |
+
'rgba(0, 136, 204, 0.8)',
|
| 424 |
+
'rgba(40, 167, 69, 0.8)',
|
| 425 |
+
'rgba(255, 193, 7, 0.8)',
|
| 426 |
+
'rgba(23, 162, 184, 0.8)',
|
| 427 |
+
'rgba(108, 117, 125, 0.8)'
|
| 428 |
+
],
|
| 429 |
+
borderWidth: 0
|
| 430 |
+
}]
|
| 431 |
+
},
|
| 432 |
+
options: {
|
| 433 |
+
responsive: true,
|
| 434 |
+
maintainAspectRatio: false,
|
| 435 |
+
plugins: {
|
| 436 |
+
legend: {
|
| 437 |
+
position: 'right',
|
| 438 |
+
labels: { color: '#a0aec0' }
|
| 439 |
+
}
|
| 440 |
+
}
|
| 441 |
+
}
|
| 442 |
+
});
|
| 443 |
+
}
|
| 444 |
+
|
| 445 |
+
// Helper functions
|
| 446 |
+
function formatNumber(num) {
|
| 447 |
+
if (num >= 1000000) return (num / 1000000).toFixed(1) + 'M';
|
| 448 |
+
if (num >= 1000) return (num / 1000).toFixed(1) + 'K';
|
| 449 |
+
return num.toString();
|
| 450 |
+
}
|
| 451 |
+
|
| 452 |
+
function escapeHtml(text) {
|
| 453 |
+
const div = document.createElement('div');
|
| 454 |
+
div.textContent = text;
|
| 455 |
+
return div.innerHTML;
|
| 456 |
+
}
|
| 457 |
+
</script>
|
| 458 |
+
</body>
|
| 459 |
+
</html>
|
templates/search.html
ADDED
|
@@ -0,0 +1,359 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>Search - Telegram Analytics</title>
|
| 7 |
+
<link rel="stylesheet" href="/static/css/style.css">
|
| 8 |
+
</head>
|
| 9 |
+
<body>
|
| 10 |
+
<!-- Sidebar -->
|
| 11 |
+
<nav class="sidebar">
|
| 12 |
+
<div class="logo">
|
| 13 |
+
<span class="logo-icon">📊</span>
|
| 14 |
+
<span class="logo-text">TG Analytics</span>
|
| 15 |
+
</div>
|
| 16 |
+
<ul class="nav-menu">
|
| 17 |
+
<li class="nav-item">
|
| 18 |
+
<a href="/" class="nav-link">
|
| 19 |
+
<span class="icon">📈</span>
|
| 20 |
+
<span>Overview</span>
|
| 21 |
+
</a>
|
| 22 |
+
</li>
|
| 23 |
+
<li class="nav-item">
|
| 24 |
+
<a href="/users" class="nav-link">
|
| 25 |
+
<span class="icon">👥</span>
|
| 26 |
+
<span>Users</span>
|
| 27 |
+
</a>
|
| 28 |
+
</li>
|
| 29 |
+
<li class="nav-item">
|
| 30 |
+
<a href="/chat" class="nav-link">
|
| 31 |
+
<span class="icon">💬</span>
|
| 32 |
+
<span>Chat</span>
|
| 33 |
+
</a>
|
| 34 |
+
</li>
|
| 35 |
+
<li class="nav-item active">
|
| 36 |
+
<a href="/search" class="nav-link">
|
| 37 |
+
<span class="icon">🔍</span>
|
| 38 |
+
<span>Search</span>
|
| 39 |
+
</a>
|
| 40 |
+
</li>
|
| 41 |
+
<li class="nav-item">
|
| 42 |
+
<a href="/moderation" class="nav-link">
|
| 43 |
+
<span class="icon">🛡️</span>
|
| 44 |
+
<span>Moderation</span>
|
| 45 |
+
</a>
|
| 46 |
+
</li>
|
| 47 |
+
<li class="nav-item">
|
| 48 |
+
<a href="/settings" class="nav-link">
|
| 49 |
+
<span class="icon">⚙️</span>
|
| 50 |
+
<span>Settings</span>
|
| 51 |
+
</a>
|
| 52 |
+
</li>
|
| 53 |
+
</ul>
|
| 54 |
+
<div class="sidebar-footer">
|
| 55 |
+
<div class="export-buttons">
|
| 56 |
+
<button onclick="exportMessages()" class="btn btn-sm">📥 Export Messages</button>
|
| 57 |
+
</div>
|
| 58 |
+
</div>
|
| 59 |
+
</nav>
|
| 60 |
+
|
| 61 |
+
<!-- Main Content -->
|
| 62 |
+
<main class="main-content">
|
| 63 |
+
<!-- Header -->
|
| 64 |
+
<header class="header">
|
| 65 |
+
<h1>Search Messages</h1>
|
| 66 |
+
<div class="header-controls">
|
| 67 |
+
<select id="timeframe" class="select">
|
| 68 |
+
<option value="today">Today</option>
|
| 69 |
+
<option value="yesterday">Yesterday</option>
|
| 70 |
+
<option value="week">This Week</option>
|
| 71 |
+
<option value="month">This Month</option>
|
| 72 |
+
<option value="year">This Year</option>
|
| 73 |
+
<option value="all" selected>All Time</option>
|
| 74 |
+
</select>
|
| 75 |
+
</div>
|
| 76 |
+
</header>
|
| 77 |
+
|
| 78 |
+
<!-- Search Box -->
|
| 79 |
+
<section class="search-box">
|
| 80 |
+
<input type="search" id="search-input" class="search-input"
|
| 81 |
+
placeholder="Search messages... (supports Hebrew and English)"
|
| 82 |
+
onkeypress="if(event.key === 'Enter') performSearch()">
|
| 83 |
+
<button onclick="performSearch()" class="btn btn-primary">🔍 Search</button>
|
| 84 |
+
</section>
|
| 85 |
+
|
| 86 |
+
<!-- Search Tips -->
|
| 87 |
+
<section class="chart-card" style="margin-bottom: var(--spacing-xl);">
|
| 88 |
+
<div class="chart-header">
|
| 89 |
+
<h3>Search Tips</h3>
|
| 90 |
+
</div>
|
| 91 |
+
<div style="padding: var(--spacing-md); color: var(--text-secondary); font-size: 0.875rem;">
|
| 92 |
+
<ul style="list-style: none; display: grid; grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); gap: 1rem;">
|
| 93 |
+
<li><strong>word1 word2</strong> - Messages containing both words</li>
|
| 94 |
+
<li><strong>"exact phrase"</strong> - Messages with exact phrase</li>
|
| 95 |
+
<li><strong>word1 OR word2</strong> - Messages with either word</li>
|
| 96 |
+
<li><strong>word*</strong> - Prefix search (word, words, wording)</li>
|
| 97 |
+
<li><strong>NOT word</strong> - Exclude messages with word</li>
|
| 98 |
+
<li><strong>Hebrew supported</strong> - Full Hebrew text search</li>
|
| 99 |
+
</ul>
|
| 100 |
+
</div>
|
| 101 |
+
</section>
|
| 102 |
+
|
| 103 |
+
<!-- Search Stats -->
|
| 104 |
+
<section class="stats-grid" id="search-stats" style="display: none;">
|
| 105 |
+
<div class="stat-card">
|
| 106 |
+
<div class="stat-icon">🔍</div>
|
| 107 |
+
<div class="stat-content">
|
| 108 |
+
<div class="stat-value" id="result-count">0</div>
|
| 109 |
+
<div class="stat-label">Results Found</div>
|
| 110 |
+
</div>
|
| 111 |
+
</div>
|
| 112 |
+
<div class="stat-card">
|
| 113 |
+
<div class="stat-icon">⚡</div>
|
| 114 |
+
<div class="stat-content">
|
| 115 |
+
<div class="stat-value" id="search-time">0ms</div>
|
| 116 |
+
<div class="stat-label">Search Time</div>
|
| 117 |
+
</div>
|
| 118 |
+
</div>
|
| 119 |
+
</section>
|
| 120 |
+
|
| 121 |
+
<!-- Search Results -->
|
| 122 |
+
<section class="search-results" id="search-results">
|
| 123 |
+
<div class="empty-state">
|
| 124 |
+
<div class="empty-state-icon">🔍</div>
|
| 125 |
+
<p>Enter a search term to find messages</p>
|
| 126 |
+
</div>
|
| 127 |
+
</section>
|
| 128 |
+
|
| 129 |
+
<!-- Pagination -->
|
| 130 |
+
<div class="pagination" id="pagination"></div>
|
| 131 |
+
</main>
|
| 132 |
+
|
| 133 |
+
<script>
|
| 134 |
+
// State
|
| 135 |
+
let currentQuery = '';
|
| 136 |
+
let currentPage = 1;
|
| 137 |
+
const pageSize = 20;
|
| 138 |
+
|
| 139 |
+
async function performSearch(page = 1) {
|
| 140 |
+
const query = document.getElementById('search-input').value.trim();
|
| 141 |
+
const timeframe = document.getElementById('timeframe').value;
|
| 142 |
+
|
| 143 |
+
if (!query) {
|
| 144 |
+
document.getElementById('search-results').innerHTML = `
|
| 145 |
+
<div class="empty-state">
|
| 146 |
+
<div class="empty-state-icon">🔍</div>
|
| 147 |
+
<p>Enter a search term to find messages</p>
|
| 148 |
+
</div>
|
| 149 |
+
`;
|
| 150 |
+
document.getElementById('search-stats').style.display = 'none';
|
| 151 |
+
document.getElementById('pagination').innerHTML = '';
|
| 152 |
+
return;
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
currentQuery = query;
|
| 156 |
+
currentPage = page;
|
| 157 |
+
|
| 158 |
+
const resultsDiv = document.getElementById('search-results');
|
| 159 |
+
resultsDiv.innerHTML = '<div class="loading"><div class="spinner"></div></div>';
|
| 160 |
+
|
| 161 |
+
const startTime = performance.now();
|
| 162 |
+
|
| 163 |
+
try {
|
| 164 |
+
const offset = (page - 1) * pageSize;
|
| 165 |
+
const response = await fetch(
|
| 166 |
+
`/api/search?q=${encodeURIComponent(query)}&timeframe=${timeframe}&limit=${pageSize}&offset=${offset}`
|
| 167 |
+
);
|
| 168 |
+
const data = await response.json();
|
| 169 |
+
|
| 170 |
+
const endTime = performance.now();
|
| 171 |
+
const searchTime = Math.round(endTime - startTime);
|
| 172 |
+
|
| 173 |
+
// Show stats
|
| 174 |
+
document.getElementById('search-stats').style.display = 'grid';
|
| 175 |
+
document.getElementById('result-count').textContent = data.results.length +
|
| 176 |
+
(data.results.length === pageSize ? '+' : '');
|
| 177 |
+
document.getElementById('search-time').textContent = searchTime + 'ms';
|
| 178 |
+
|
| 179 |
+
if (data.results.length === 0) {
|
| 180 |
+
resultsDiv.innerHTML = `
|
| 181 |
+
<div class="empty-state">
|
| 182 |
+
<div class="empty-state-icon">😕</div>
|
| 183 |
+
<p>No messages found for "${escapeHtml(query)}"</p>
|
| 184 |
+
</div>
|
| 185 |
+
`;
|
| 186 |
+
document.getElementById('pagination').innerHTML = '';
|
| 187 |
+
return;
|
| 188 |
+
}
|
| 189 |
+
|
| 190 |
+
resultsDiv.innerHTML = data.results.map(result => `
|
| 191 |
+
<div class="search-result-item">
|
| 192 |
+
<div class="search-result-header">
|
| 193 |
+
<span class="search-result-author">${escapeHtml(result.from_name || 'Unknown')}</span>
|
| 194 |
+
<span class="search-result-date">${result.date}</span>
|
| 195 |
+
</div>
|
| 196 |
+
<div class="search-result-text">${highlightQuery(result.text, query)}</div>
|
| 197 |
+
<div style="margin-top: 0.5rem; font-size: 0.75rem; color: var(--text-muted);">
|
| 198 |
+
${result.has_links ? '🔗 Link' : ''}
|
| 199 |
+
${result.has_media ? '🖼️ Media' : ''}
|
| 200 |
+
</div>
|
| 201 |
+
</div>
|
| 202 |
+
`).join('');
|
| 203 |
+
|
| 204 |
+
// Simple pagination (since we don't have total count from FTS)
|
| 205 |
+
renderPagination(data.results.length === pageSize);
|
| 206 |
+
|
| 207 |
+
} catch (error) {
|
| 208 |
+
resultsDiv.innerHTML = `
|
| 209 |
+
<div class="empty-state">
|
| 210 |
+
<div class="empty-state-icon">❌</div>
|
| 211 |
+
<p>Error performing search. Please try again.</p>
|
| 212 |
+
</div>
|
| 213 |
+
`;
|
| 214 |
+
}
|
| 215 |
+
}
|
| 216 |
+
|
| 217 |
+
function renderPagination(hasMore) {
|
| 218 |
+
const pagination = document.getElementById('pagination');
|
| 219 |
+
|
| 220 |
+
if (currentPage === 1 && !hasMore) {
|
| 221 |
+
pagination.innerHTML = '';
|
| 222 |
+
return;
|
| 223 |
+
}
|
| 224 |
+
|
| 225 |
+
let html = '';
|
| 226 |
+
|
| 227 |
+
html += `<button class="page-btn" onclick="performSearch(${currentPage - 1})"
|
| 228 |
+
${currentPage === 1 ? 'disabled' : ''}>« Previous</button>`;
|
| 229 |
+
|
| 230 |
+
html += `<span style="padding: 0 1rem; color: var(--text-muted);">Page ${currentPage}</span>`;
|
| 231 |
+
|
| 232 |
+
html += `<button class="page-btn" onclick="performSearch(${currentPage + 1})"
|
| 233 |
+
${!hasMore ? 'disabled' : ''}>Next »</button>`;
|
| 234 |
+
|
| 235 |
+
pagination.innerHTML = html;
|
| 236 |
+
}
|
| 237 |
+
|
| 238 |
+
function highlightQuery(text, query) {
|
| 239 |
+
if (!text) return '';
|
| 240 |
+
|
| 241 |
+
// Escape HTML first
|
| 242 |
+
text = escapeHtml(text);
|
| 243 |
+
|
| 244 |
+
// Simple highlight for each word in query
|
| 245 |
+
const words = query.replace(/["*]/g, '').split(/\s+/).filter(w => w && w !== 'OR' && w !== 'NOT');
|
| 246 |
+
|
| 247 |
+
words.forEach(word => {
|
| 248 |
+
const regex = new RegExp(`(${escapeRegex(word)})`, 'gi');
|
| 249 |
+
text = text.replace(regex, '<span class="search-highlight">$1</span>');
|
| 250 |
+
});
|
| 251 |
+
|
| 252 |
+
return text;
|
| 253 |
+
}
|
| 254 |
+
|
| 255 |
+
function escapeHtml(text) {
|
| 256 |
+
const div = document.createElement('div');
|
| 257 |
+
div.textContent = text;
|
| 258 |
+
return div.innerHTML;
|
| 259 |
+
}
|
| 260 |
+
|
| 261 |
+
function escapeRegex(string) {
|
| 262 |
+
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
| 263 |
+
}
|
| 264 |
+
|
| 265 |
+
function exportMessages() {
|
| 266 |
+
const timeframe = document.getElementById('timeframe').value;
|
| 267 |
+
window.location.href = `/api/export/messages?timeframe=${timeframe}`;
|
| 268 |
+
}
|
| 269 |
+
|
| 270 |
+
// AI Search
|
| 271 |
+
async function aiSearch() {
|
| 272 |
+
const query = document.getElementById('ai-query').value.trim();
|
| 273 |
+
const mode = document.getElementById('ai-mode').value;
|
| 274 |
+
if (!query) return;
|
| 275 |
+
|
| 276 |
+
const resultDiv = document.getElementById('ai-result');
|
| 277 |
+
const answerDiv = document.getElementById('ai-answer');
|
| 278 |
+
const sqlPre = document.getElementById('ai-sql');
|
| 279 |
+
|
| 280 |
+
resultDiv.style.display = 'block';
|
| 281 |
+
const loadingMessages = {
|
| 282 |
+
'context': 'קורא הודעות ומנתח...',
|
| 283 |
+
'semantic': 'מחפש לפי משמעות + שולח ל-AI...',
|
| 284 |
+
'sql': 'מחפש...',
|
| 285 |
+
'auto': 'מחפש...'
|
| 286 |
+
};
|
| 287 |
+
answerDiv.textContent = loadingMessages[mode] || 'מחפש...';
|
| 288 |
+
sqlPre.textContent = '';
|
| 289 |
+
|
| 290 |
+
try {
|
| 291 |
+
const response = await fetch('/api/ai/search', {
|
| 292 |
+
method: 'POST',
|
| 293 |
+
headers: { 'Content-Type': 'application/json' },
|
| 294 |
+
body: JSON.stringify({ query, mode })
|
| 295 |
+
});
|
| 296 |
+
|
| 297 |
+
const data = await response.json();
|
| 298 |
+
|
| 299 |
+
if (data.error) {
|
| 300 |
+
answerDiv.innerHTML = `<span style="color:#ff6b6b;">שגיאה: ${escapeHtml(data.error)}</span>`;
|
| 301 |
+
} else {
|
| 302 |
+
let html = escapeHtml(data.answer || 'לא נמצאה תשובה');
|
| 303 |
+
|
| 304 |
+
// Show mode info
|
| 305 |
+
if (data.mode === 'context_search') {
|
| 306 |
+
html += `<br><br><small style="color:var(--text-muted);">🧠 Hybrid Search: קראתי ${data.context_messages} הודעות`;
|
| 307 |
+
if (data.context_user) html += ` מ"${escapeHtml(data.context_user)}"`;
|
| 308 |
+
if (data.keywords_used && data.keywords_used.length > 0) {
|
| 309 |
+
html += `<br>🔑 מילות מפתח: ${data.keywords_used.slice(0, 5).join(', ')}`;
|
| 310 |
+
}
|
| 311 |
+
html += `</small>`;
|
| 312 |
+
} else if (data.mode === 'semantic_ai' || data.mode === 'semantic') {
|
| 313 |
+
html += `<br><br><small style="color:var(--text-muted);">🔮 Semantic + AI: נמצאו ${data.count} הודעות דומות`;
|
| 314 |
+
if (data.total_with_threads && data.total_with_threads > data.count) {
|
| 315 |
+
html += ` + ${data.total_with_threads - data.count} הודעות מ-threads`;
|
| 316 |
+
}
|
| 317 |
+
html += `</small>`;
|
| 318 |
+
}
|
| 319 |
+
|
| 320 |
+
answerDiv.innerHTML = html;
|
| 321 |
+
sqlPre.textContent = data.sql || '';
|
| 322 |
+
|
| 323 |
+
// If results contain messages, optionally populate main search
|
| 324 |
+
if (data.results && data.results.length > 0 && data.results[0].text) {
|
| 325 |
+
displayAIResults(data.results);
|
| 326 |
+
}
|
| 327 |
+
}
|
| 328 |
+
} catch (error) {
|
| 329 |
+
answerDiv.textContent = `שגיאה: ${error.message}`;
|
| 330 |
+
}
|
| 331 |
+
}
|
| 332 |
+
|
| 333 |
+
function displayAIResults(results) {
|
| 334 |
+
const resultsDiv = document.getElementById('search-results');
|
| 335 |
+
|
| 336 |
+
if (results.length === 0) return;
|
| 337 |
+
|
| 338 |
+
document.getElementById('search-stats').style.display = 'grid';
|
| 339 |
+
document.getElementById('result-count').textContent = results.length;
|
| 340 |
+
document.getElementById('search-time').textContent = 'AI';
|
| 341 |
+
|
| 342 |
+
resultsDiv.innerHTML = results.slice(0, 20).map(result => `
|
| 343 |
+
<div class="search-result-item">
|
| 344 |
+
<div class="search-result-header">
|
| 345 |
+
<span class="search-result-author">${escapeHtml(result.from_name || 'Unknown')}</span>
|
| 346 |
+
<span class="search-result-date">${result.date || ''}${result.score ? ` (${(result.score * 100).toFixed(0)}% דמיון)` : ''}</span>
|
| 347 |
+
</div>
|
| 348 |
+
<div class="search-result-text">${escapeHtml(result.text || '')}</div>
|
| 349 |
+
</div>
|
| 350 |
+
`).join('');
|
| 351 |
+
}
|
| 352 |
+
|
| 353 |
+
// Focus search input on page load
|
| 354 |
+
document.addEventListener('DOMContentLoaded', () => {
|
| 355 |
+
document.getElementById('search-input').focus();
|
| 356 |
+
});
|
| 357 |
+
</script>
|
| 358 |
+
</body>
|
| 359 |
+
</html>
|
templates/settings.html
ADDED
|
@@ -0,0 +1,444 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="he" dir="rtl">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>Settings - Telegram Analytics</title>
|
| 7 |
+
<link rel="stylesheet" href="/static/css/style.css">
|
| 8 |
+
<style>
|
| 9 |
+
.upload-zone {
|
| 10 |
+
border: 2px dashed var(--border-color);
|
| 11 |
+
border-radius: var(--radius-lg);
|
| 12 |
+
padding: var(--spacing-xl);
|
| 13 |
+
text-align: center;
|
| 14 |
+
transition: all 0.3s ease;
|
| 15 |
+
cursor: pointer;
|
| 16 |
+
margin-bottom: var(--spacing-xl);
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
.upload-zone:hover,
|
| 20 |
+
.upload-zone.dragover {
|
| 21 |
+
border-color: var(--primary);
|
| 22 |
+
background: rgba(0, 136, 204, 0.1);
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
.upload-zone-icon {
|
| 26 |
+
font-size: 3rem;
|
| 27 |
+
margin-bottom: var(--spacing-md);
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
.upload-zone-text {
|
| 31 |
+
color: var(--text-secondary);
|
| 32 |
+
margin-bottom: var(--spacing-sm);
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
.upload-zone-hint {
|
| 36 |
+
font-size: 0.75rem;
|
| 37 |
+
color: var(--text-muted);
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
.upload-progress {
|
| 41 |
+
display: none;
|
| 42 |
+
margin-top: var(--spacing-lg);
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
.upload-progress.active {
|
| 46 |
+
display: block;
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
.progress-bar-container {
|
| 50 |
+
background: var(--bg-sidebar);
|
| 51 |
+
border-radius: var(--radius-md);
|
| 52 |
+
height: 20px;
|
| 53 |
+
overflow: hidden;
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
.progress-bar-fill {
|
| 57 |
+
height: 100%;
|
| 58 |
+
background: var(--primary);
|
| 59 |
+
transition: width 0.3s ease;
|
| 60 |
+
display: flex;
|
| 61 |
+
align-items: center;
|
| 62 |
+
justify-content: center;
|
| 63 |
+
color: white;
|
| 64 |
+
font-size: 0.75rem;
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
.upload-result {
|
| 68 |
+
display: none;
|
| 69 |
+
margin-top: var(--spacing-lg);
|
| 70 |
+
padding: var(--spacing-lg);
|
| 71 |
+
border-radius: var(--radius-lg);
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
.upload-result.success {
|
| 75 |
+
display: block;
|
| 76 |
+
background: rgba(40, 167, 69, 0.2);
|
| 77 |
+
border: 1px solid var(--success);
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
.upload-result.error {
|
| 81 |
+
display: block;
|
| 82 |
+
background: rgba(220, 53, 69, 0.2);
|
| 83 |
+
border: 1px solid var(--danger);
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
.result-title {
|
| 87 |
+
font-weight: 600;
|
| 88 |
+
margin-bottom: var(--spacing-md);
|
| 89 |
+
display: flex;
|
| 90 |
+
align-items: center;
|
| 91 |
+
gap: var(--spacing-sm);
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
.result-stats {
|
| 95 |
+
display: grid;
|
| 96 |
+
grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
|
| 97 |
+
gap: var(--spacing-md);
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
.result-stat {
|
| 101 |
+
text-align: center;
|
| 102 |
+
padding: var(--spacing-md);
|
| 103 |
+
background: var(--bg-sidebar);
|
| 104 |
+
border-radius: var(--radius-md);
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
.result-stat-value {
|
| 108 |
+
font-size: 1.5rem;
|
| 109 |
+
font-weight: 700;
|
| 110 |
+
color: var(--primary);
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
.result-stat-label {
|
| 114 |
+
font-size: 0.75rem;
|
| 115 |
+
color: var(--text-muted);
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
.db-stats {
|
| 119 |
+
display: grid;
|
| 120 |
+
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
|
| 121 |
+
gap: var(--spacing-md);
|
| 122 |
+
margin-bottom: var(--spacing-xl);
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
.db-stat {
|
| 126 |
+
background: var(--bg-card);
|
| 127 |
+
border-radius: var(--radius-lg);
|
| 128 |
+
padding: var(--spacing-lg);
|
| 129 |
+
border: 1px solid var(--border-color);
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
.db-stat-value {
|
| 133 |
+
font-size: 1.75rem;
|
| 134 |
+
font-weight: 700;
|
| 135 |
+
color: var(--primary);
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
.db-stat-label {
|
| 139 |
+
font-size: 0.875rem;
|
| 140 |
+
color: var(--text-muted);
|
| 141 |
+
margin-top: var(--spacing-xs);
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
.instructions {
|
| 145 |
+
background: var(--bg-card);
|
| 146 |
+
border-radius: var(--radius-lg);
|
| 147 |
+
padding: var(--spacing-lg);
|
| 148 |
+
border: 1px solid var(--border-color);
|
| 149 |
+
}
|
| 150 |
+
|
| 151 |
+
.instructions h3 {
|
| 152 |
+
margin-bottom: var(--spacing-md);
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
.instructions ol {
|
| 156 |
+
padding-right: var(--spacing-lg);
|
| 157 |
+
color: var(--text-secondary);
|
| 158 |
+
line-height: 1.8;
|
| 159 |
+
}
|
| 160 |
+
|
| 161 |
+
.instructions code {
|
| 162 |
+
background: var(--bg-sidebar);
|
| 163 |
+
padding: 2px 6px;
|
| 164 |
+
border-radius: var(--radius-sm);
|
| 165 |
+
font-family: monospace;
|
| 166 |
+
}
|
| 167 |
+
</style>
|
| 168 |
+
</head>
|
| 169 |
+
<body>
|
| 170 |
+
<!-- Sidebar -->
|
| 171 |
+
<nav class="sidebar">
|
| 172 |
+
<div class="logo">
|
| 173 |
+
<span class="logo-icon">📊</span>
|
| 174 |
+
<span class="logo-text">TG Analytics</span>
|
| 175 |
+
</div>
|
| 176 |
+
<ul class="nav-menu">
|
| 177 |
+
<li class="nav-item">
|
| 178 |
+
<a href="/" class="nav-link">
|
| 179 |
+
<span class="icon">📈</span>
|
| 180 |
+
<span>Overview</span>
|
| 181 |
+
</a>
|
| 182 |
+
</li>
|
| 183 |
+
<li class="nav-item">
|
| 184 |
+
<a href="/users" class="nav-link">
|
| 185 |
+
<span class="icon">👥</span>
|
| 186 |
+
<span>Users</span>
|
| 187 |
+
</a>
|
| 188 |
+
</li>
|
| 189 |
+
<li class="nav-item">
|
| 190 |
+
<a href="/chat" class="nav-link">
|
| 191 |
+
<span class="icon">💬</span>
|
| 192 |
+
<span>Chat</span>
|
| 193 |
+
</a>
|
| 194 |
+
</li>
|
| 195 |
+
<li class="nav-item">
|
| 196 |
+
<a href="/search" class="nav-link">
|
| 197 |
+
<span class="icon">🔍</span>
|
| 198 |
+
<span>Search</span>
|
| 199 |
+
</a>
|
| 200 |
+
</li>
|
| 201 |
+
<li class="nav-item">
|
| 202 |
+
<a href="/moderation" class="nav-link">
|
| 203 |
+
<span class="icon">🛡️</span>
|
| 204 |
+
<span>Moderation</span>
|
| 205 |
+
</a>
|
| 206 |
+
</li>
|
| 207 |
+
<li class="nav-item active">
|
| 208 |
+
<a href="/settings" class="nav-link">
|
| 209 |
+
<span class="icon">⚙️</span>
|
| 210 |
+
<span>Settings</span>
|
| 211 |
+
</a>
|
| 212 |
+
</li>
|
| 213 |
+
</ul>
|
| 214 |
+
</nav>
|
| 215 |
+
|
| 216 |
+
<!-- Main Content -->
|
| 217 |
+
<main class="main-content">
|
| 218 |
+
<!-- Header -->
|
| 219 |
+
<header class="header">
|
| 220 |
+
<h1>⚙️ Settings & Update Data</h1>
|
| 221 |
+
</header>
|
| 222 |
+
|
| 223 |
+
<!-- Database Stats -->
|
| 224 |
+
<section>
|
| 225 |
+
<h2 style="margin-bottom: var(--spacing-md);">📊 Database Status</h2>
|
| 226 |
+
<div class="db-stats" id="db-stats">
|
| 227 |
+
<div class="db-stat">
|
| 228 |
+
<div class="db-stat-value" id="stat-messages">-</div>
|
| 229 |
+
<div class="db-stat-label">Total Messages</div>
|
| 230 |
+
</div>
|
| 231 |
+
<div class="db-stat">
|
| 232 |
+
<div class="db-stat-value" id="stat-users">-</div>
|
| 233 |
+
<div class="db-stat-label">Total Users</div>
|
| 234 |
+
</div>
|
| 235 |
+
<div class="db-stat">
|
| 236 |
+
<div class="db-stat-value" id="stat-first">-</div>
|
| 237 |
+
<div class="db-stat-label">First Message</div>
|
| 238 |
+
</div>
|
| 239 |
+
<div class="db-stat">
|
| 240 |
+
<div class="db-stat-value" id="stat-last">-</div>
|
| 241 |
+
<div class="db-stat-label">Last Message</div>
|
| 242 |
+
</div>
|
| 243 |
+
<div class="db-stat">
|
| 244 |
+
<div class="db-stat-value" id="stat-size">-</div>
|
| 245 |
+
<div class="db-stat-label">Database Size</div>
|
| 246 |
+
</div>
|
| 247 |
+
</div>
|
| 248 |
+
</section>
|
| 249 |
+
|
| 250 |
+
<!-- Upload Section (disabled - updates done locally) -->
|
| 251 |
+
<section class="chart-card" style="margin-bottom: var(--spacing-xl); opacity: 0.6;">
|
| 252 |
+
<div class="chart-header">
|
| 253 |
+
<h3>📤 Update Database</h3>
|
| 254 |
+
</div>
|
| 255 |
+
<div style="padding: var(--spacing-lg); text-align: center; color: var(--text-muted);">
|
| 256 |
+
<p>עדכוני מסד הנתונים מתבצעים מקומית באמצעות daily_sync.py</p>
|
| 257 |
+
</div>
|
| 258 |
+
</section>
|
| 259 |
+
|
| 260 |
+
<!-- Instructions -->
|
| 261 |
+
<section class="instructions">
|
| 262 |
+
<h3>📖 איך לייצא נתונים מטלגרם</h3>
|
| 263 |
+
<ol>
|
| 264 |
+
<li>פתח את <strong>Telegram Desktop</strong> (לא ניתן מהאפליקציה הניידת)</li>
|
| 265 |
+
<li>לך ל-<strong>Settings → Advanced → Export Telegram data</strong></li>
|
| 266 |
+
<li>בחר את הקבוצה/צ'אט שברצונך לייצא</li>
|
| 267 |
+
<li>סמן <strong>JSON</strong> כפורמט הייצוא</li>
|
| 268 |
+
<li>לחץ <strong>Export</strong> והמתן לסיום</li>
|
| 269 |
+
<li>העלה את קובץ <code>result.json</code> כאן</li>
|
| 270 |
+
</ol>
|
| 271 |
+
<div style="margin-top: var(--spacing-lg); padding: var(--spacing-md); background: var(--bg-sidebar); border-radius: var(--radius-md);">
|
| 272 |
+
<strong>💡 טיפ:</strong> המערכת תזהה אוטומטית הודעות כפולות ותוסיף רק הודעות חדשות.
|
| 273 |
+
אין צורך לדאוג מהעלאת אותו קובץ פעמיים.
|
| 274 |
+
</div>
|
| 275 |
+
</section>
|
| 276 |
+
|
| 277 |
+
<!-- CLI Instructions -->
|
| 278 |
+
<section class="instructions" style="margin-top: var(--spacing-xl);">
|
| 279 |
+
<h3>💻 עדכון דרך שורת הפקודה</h3>
|
| 280 |
+
<p style="color: var(--text-secondary); margin-bottom: var(--spacing-md);">
|
| 281 |
+
לקבצים גדולים, מומלץ להשתמש בשורת הפקודה:
|
| 282 |
+
</p>
|
| 283 |
+
<pre style="background: var(--bg-sidebar); padding: var(--spacing-md); border-radius: var(--radius-md); overflow-x: auto; direction: ltr; text-align: left;">
|
| 284 |
+
# עדכון database קיים עם JSON חדש
|
| 285 |
+
python indexer.py new_export.json --db telegram.db --update
|
| 286 |
+
|
| 287 |
+
# יצירת database חדש
|
| 288 |
+
python indexer.py result.json --db telegram.db
|
| 289 |
+
</pre>
|
| 290 |
+
</section>
|
| 291 |
+
</main>
|
| 292 |
+
|
| 293 |
+
<script>
|
| 294 |
+
// Load database stats on page load
|
| 295 |
+
document.addEventListener('DOMContentLoaded', loadDbStats);
|
| 296 |
+
|
| 297 |
+
async function loadDbStats() {
|
| 298 |
+
try {
|
| 299 |
+
const response = await fetch('/api/db/stats');
|
| 300 |
+
const stats = await response.json();
|
| 301 |
+
|
| 302 |
+
document.getElementById('stat-messages').textContent =
|
| 303 |
+
stats.total_messages?.toLocaleString() || '-';
|
| 304 |
+
document.getElementById('stat-users').textContent =
|
| 305 |
+
stats.total_users?.toLocaleString() || '-';
|
| 306 |
+
document.getElementById('stat-first').textContent =
|
| 307 |
+
stats.first_message ? new Date(stats.first_message).toLocaleDateString('he-IL') : '-';
|
| 308 |
+
document.getElementById('stat-last').textContent =
|
| 309 |
+
stats.last_message ? new Date(stats.last_message).toLocaleDateString('he-IL') : '-';
|
| 310 |
+
document.getElementById('stat-size').textContent =
|
| 311 |
+
stats.db_size_mb ? `${stats.db_size_mb} MB` : '-';
|
| 312 |
+
} catch (error) {
|
| 313 |
+
console.error('Error loading db stats:', error);
|
| 314 |
+
}
|
| 315 |
+
}
|
| 316 |
+
|
| 317 |
+
// Drag and drop handlers
|
| 318 |
+
const uploadZone = document.getElementById('upload-zone');
|
| 319 |
+
|
| 320 |
+
uploadZone.addEventListener('dragover', (e) => {
|
| 321 |
+
e.preventDefault();
|
| 322 |
+
uploadZone.classList.add('dragover');
|
| 323 |
+
});
|
| 324 |
+
|
| 325 |
+
uploadZone.addEventListener('dragleave', () => {
|
| 326 |
+
uploadZone.classList.remove('dragover');
|
| 327 |
+
});
|
| 328 |
+
|
| 329 |
+
uploadZone.addEventListener('drop', (e) => {
|
| 330 |
+
e.preventDefault();
|
| 331 |
+
uploadZone.classList.remove('dragover');
|
| 332 |
+
|
| 333 |
+
const files = e.dataTransfer.files;
|
| 334 |
+
if (files.length > 0) {
|
| 335 |
+
uploadFile(files[0]);
|
| 336 |
+
}
|
| 337 |
+
});
|
| 338 |
+
|
| 339 |
+
function handleFileSelect(event) {
|
| 340 |
+
const file = event.target.files[0];
|
| 341 |
+
if (file) {
|
| 342 |
+
uploadFile(file);
|
| 343 |
+
}
|
| 344 |
+
}
|
| 345 |
+
|
| 346 |
+
async function uploadFile(file) {
|
| 347 |
+
if (!file.name.endsWith('.json')) {
|
| 348 |
+
showError('נא לבחור קובץ JSON בלבד');
|
| 349 |
+
return;
|
| 350 |
+
}
|
| 351 |
+
|
| 352 |
+
const progressDiv = document.getElementById('upload-progress');
|
| 353 |
+
const progressFill = document.getElementById('progress-fill');
|
| 354 |
+
const progressText = document.getElementById('progress-text');
|
| 355 |
+
const resultDiv = document.getElementById('upload-result');
|
| 356 |
+
|
| 357 |
+
// Reset and show progress
|
| 358 |
+
progressDiv.classList.add('active');
|
| 359 |
+
resultDiv.className = 'upload-result';
|
| 360 |
+
progressFill.style.width = '0%';
|
| 361 |
+
progressFill.textContent = '0%';
|
| 362 |
+
progressText.textContent = `מעלה ${file.name}...`;
|
| 363 |
+
|
| 364 |
+
try {
|
| 365 |
+
// Read file
|
| 366 |
+
progressFill.style.width = '20%';
|
| 367 |
+
progressFill.textContent = '20%';
|
| 368 |
+
progressText.textContent = 'קורא קובץ...';
|
| 369 |
+
|
| 370 |
+
const formData = new FormData();
|
| 371 |
+
formData.append('file', file);
|
| 372 |
+
|
| 373 |
+
// Upload
|
| 374 |
+
progressFill.style.width = '50%';
|
| 375 |
+
progressFill.textContent = '50%';
|
| 376 |
+
progressText.textContent = 'מעבד נתונים...';
|
| 377 |
+
|
| 378 |
+
const response = await fetch('/api/update', {
|
| 379 |
+
method: 'POST',
|
| 380 |
+
body: formData
|
| 381 |
+
});
|
| 382 |
+
|
| 383 |
+
const result = await response.json();
|
| 384 |
+
|
| 385 |
+
progressFill.style.width = '100%';
|
| 386 |
+
progressFill.textContent = '100%';
|
| 387 |
+
|
| 388 |
+
if (result.success) {
|
| 389 |
+
showSuccess(result.stats);
|
| 390 |
+
loadDbStats(); // Refresh stats
|
| 391 |
+
} else {
|
| 392 |
+
showError(result.error || 'שגיאה לא ידועה');
|
| 393 |
+
}
|
| 394 |
+
|
| 395 |
+
} catch (error) {
|
| 396 |
+
showError(error.message);
|
| 397 |
+
}
|
| 398 |
+
|
| 399 |
+
// Hide progress after a delay
|
| 400 |
+
setTimeout(() => {
|
| 401 |
+
progressDiv.classList.remove('active');
|
| 402 |
+
}, 1000);
|
| 403 |
+
}
|
| 404 |
+
|
| 405 |
+
function showSuccess(stats) {
|
| 406 |
+
const resultDiv = document.getElementById('upload-result');
|
| 407 |
+
const resultTitle = document.getElementById('result-title');
|
| 408 |
+
const resultStats = document.getElementById('result-stats');
|
| 409 |
+
|
| 410 |
+
resultDiv.className = 'upload-result success';
|
| 411 |
+
resultTitle.innerHTML = '✅ העדכון הושלם בהצלחה!';
|
| 412 |
+
|
| 413 |
+
resultStats.innerHTML = `
|
| 414 |
+
<div class="result-stat">
|
| 415 |
+
<div class="result-stat-value">${stats.total_in_file?.toLocaleString() || 0}</div>
|
| 416 |
+
<div class="result-stat-label">הודעות בקובץ</div>
|
| 417 |
+
</div>
|
| 418 |
+
<div class="result-stat">
|
| 419 |
+
<div class="result-stat-value">${stats.new_messages?.toLocaleString() || 0}</div>
|
| 420 |
+
<div class="result-stat-label">הודעות חדשות נוספו</div>
|
| 421 |
+
</div>
|
| 422 |
+
<div class="result-stat">
|
| 423 |
+
<div class="result-stat-value">${stats.duplicates?.toLocaleString() || 0}</div>
|
| 424 |
+
<div class="result-stat-label">כפילויות (דולגו)</div>
|
| 425 |
+
</div>
|
| 426 |
+
<div class="result-stat">
|
| 427 |
+
<div class="result-stat-value">${stats.elapsed_seconds?.toFixed(1) || 0}s</div>
|
| 428 |
+
<div class="result-stat-label">זמן עיבוד</div>
|
| 429 |
+
</div>
|
| 430 |
+
`;
|
| 431 |
+
}
|
| 432 |
+
|
| 433 |
+
function showError(message) {
|
| 434 |
+
const resultDiv = document.getElementById('upload-result');
|
| 435 |
+
const resultTitle = document.getElementById('result-title');
|
| 436 |
+
const resultStats = document.getElementById('result-stats');
|
| 437 |
+
|
| 438 |
+
resultDiv.className = 'upload-result error';
|
| 439 |
+
resultTitle.innerHTML = `❌ שגיאה: ${message}`;
|
| 440 |
+
resultStats.innerHTML = '';
|
| 441 |
+
}
|
| 442 |
+
</script>
|
| 443 |
+
</body>
|
| 444 |
+
</html>
|
templates/user_profile.html
ADDED
|
@@ -0,0 +1,721 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>User Profile - Telegram Analytics</title>
|
| 7 |
+
<link rel="stylesheet" href="/static/css/style.css">
|
| 8 |
+
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
| 9 |
+
<style>
|
| 10 |
+
/* Profile-specific styles */
|
| 11 |
+
.profile-header {
|
| 12 |
+
display: flex;
|
| 13 |
+
align-items: center;
|
| 14 |
+
gap: 2rem;
|
| 15 |
+
margin-bottom: 2rem;
|
| 16 |
+
padding: 2rem;
|
| 17 |
+
background: var(--bg-card);
|
| 18 |
+
border-radius: var(--radius-lg);
|
| 19 |
+
border: 1px solid var(--border-color);
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
.profile-avatar {
|
| 23 |
+
width: 100px;
|
| 24 |
+
height: 100px;
|
| 25 |
+
border-radius: 50%;
|
| 26 |
+
background: var(--primary);
|
| 27 |
+
display: flex;
|
| 28 |
+
align-items: center;
|
| 29 |
+
justify-content: center;
|
| 30 |
+
font-size: 2.5rem;
|
| 31 |
+
font-weight: 700;
|
| 32 |
+
flex-shrink: 0;
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
.profile-info { flex: 1; }
|
| 36 |
+
|
| 37 |
+
.profile-name {
|
| 38 |
+
font-size: 1.75rem;
|
| 39 |
+
font-weight: 700;
|
| 40 |
+
margin-bottom: 0.25rem;
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
.profile-meta {
|
| 44 |
+
color: var(--text-muted);
|
| 45 |
+
font-size: 0.875rem;
|
| 46 |
+
display: flex;
|
| 47 |
+
gap: 1rem;
|
| 48 |
+
flex-wrap: wrap;
|
| 49 |
+
margin-top: 0.5rem;
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
.profile-meta span {
|
| 53 |
+
display: inline-flex;
|
| 54 |
+
align-items: center;
|
| 55 |
+
gap: 0.25rem;
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
.badge {
|
| 59 |
+
display: inline-block;
|
| 60 |
+
padding: 0.15rem 0.5rem;
|
| 61 |
+
border-radius: 4px;
|
| 62 |
+
font-size: 0.75rem;
|
| 63 |
+
font-weight: 600;
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
.badge-creator { background: #ffd700; color: #1a1a2e; }
|
| 67 |
+
.badge-admin { background: #28a745; color: white; }
|
| 68 |
+
.badge-bot { background: #6c757d; color: white; }
|
| 69 |
+
.badge-premium { background: #9b59b6; color: white; }
|
| 70 |
+
.badge-online { background: #28a745; color: white; }
|
| 71 |
+
.badge-recently { background: #17a2b8; color: white; }
|
| 72 |
+
.badge-offline { background: var(--border-color); color: var(--text-muted); }
|
| 73 |
+
|
| 74 |
+
.profile-stats {
|
| 75 |
+
display: grid;
|
| 76 |
+
grid-template-columns: repeat(auto-fit, minmax(140px, 1fr));
|
| 77 |
+
gap: 1rem;
|
| 78 |
+
margin-bottom: 2rem;
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
.profile-stat-card {
|
| 82 |
+
background: var(--bg-card);
|
| 83 |
+
border: 1px solid var(--border-color);
|
| 84 |
+
border-radius: var(--radius-md);
|
| 85 |
+
padding: 1rem;
|
| 86 |
+
text-align: center;
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
.profile-stat-value {
|
| 90 |
+
font-size: 1.5rem;
|
| 91 |
+
font-weight: 700;
|
| 92 |
+
color: var(--primary);
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
.profile-stat-label {
|
| 96 |
+
font-size: 0.75rem;
|
| 97 |
+
color: var(--text-muted);
|
| 98 |
+
margin-top: 0.25rem;
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
.profile-grid {
|
| 102 |
+
display: grid;
|
| 103 |
+
grid-template-columns: repeat(2, 1fr);
|
| 104 |
+
gap: 1.5rem;
|
| 105 |
+
margin-bottom: 1.5rem;
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
.profile-card {
|
| 109 |
+
background: var(--bg-card);
|
| 110 |
+
border: 1px solid var(--border-color);
|
| 111 |
+
border-radius: var(--radius-lg);
|
| 112 |
+
padding: 1.5rem;
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
.profile-card h3 {
|
| 116 |
+
font-size: 1rem;
|
| 117 |
+
margin-bottom: 1rem;
|
| 118 |
+
color: var(--text-primary);
|
| 119 |
+
display: flex;
|
| 120 |
+
align-items: center;
|
| 121 |
+
gap: 0.5rem;
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
.profile-card.full-width {
|
| 125 |
+
grid-column: span 2;
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
.reply-network-list {
|
| 129 |
+
list-style: none;
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
.reply-network-item {
|
| 133 |
+
display: flex;
|
| 134 |
+
justify-content: space-between;
|
| 135 |
+
align-items: center;
|
| 136 |
+
padding: 0.5rem 0;
|
| 137 |
+
border-bottom: 1px solid var(--border-color);
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
.reply-network-item:last-child {
|
| 141 |
+
border-bottom: none;
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
.reply-network-name {
|
| 145 |
+
display: flex;
|
| 146 |
+
align-items: center;
|
| 147 |
+
gap: 0.5rem;
|
| 148 |
+
}
|
| 149 |
+
|
| 150 |
+
.reply-network-name a {
|
| 151 |
+
color: var(--primary);
|
| 152 |
+
text-decoration: none;
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
.reply-network-name a:hover {
|
| 156 |
+
text-decoration: underline;
|
| 157 |
+
}
|
| 158 |
+
|
| 159 |
+
.reply-network-count {
|
| 160 |
+
font-weight: 600;
|
| 161 |
+
color: var(--text-secondary);
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
.reply-bar {
|
| 165 |
+
height: 4px;
|
| 166 |
+
background: var(--border-color);
|
| 167 |
+
border-radius: 2px;
|
| 168 |
+
margin-top: 4px;
|
| 169 |
+
}
|
| 170 |
+
|
| 171 |
+
.reply-bar-fill {
|
| 172 |
+
height: 100%;
|
| 173 |
+
background: var(--primary);
|
| 174 |
+
border-radius: 2px;
|
| 175 |
+
}
|
| 176 |
+
|
| 177 |
+
.links-list {
|
| 178 |
+
list-style: none;
|
| 179 |
+
}
|
| 180 |
+
|
| 181 |
+
.links-list li {
|
| 182 |
+
padding: 0.5rem 0;
|
| 183 |
+
border-bottom: 1px solid var(--border-color);
|
| 184 |
+
display: flex;
|
| 185 |
+
justify-content: space-between;
|
| 186 |
+
align-items: center;
|
| 187 |
+
}
|
| 188 |
+
|
| 189 |
+
.links-list li:last-child { border-bottom: none; }
|
| 190 |
+
|
| 191 |
+
.links-list a {
|
| 192 |
+
color: var(--primary);
|
| 193 |
+
text-decoration: none;
|
| 194 |
+
word-break: break-all;
|
| 195 |
+
font-size: 0.875rem;
|
| 196 |
+
}
|
| 197 |
+
|
| 198 |
+
.links-list a:hover { text-decoration: underline; }
|
| 199 |
+
|
| 200 |
+
.links-list .count {
|
| 201 |
+
font-weight: 600;
|
| 202 |
+
color: var(--text-muted);
|
| 203 |
+
flex-shrink: 0;
|
| 204 |
+
margin-left: 1rem;
|
| 205 |
+
}
|
| 206 |
+
|
| 207 |
+
.no-messages {
|
| 208 |
+
text-align: center;
|
| 209 |
+
padding: 3rem;
|
| 210 |
+
background: var(--bg-card);
|
| 211 |
+
border-radius: var(--radius-lg);
|
| 212 |
+
border: 1px solid var(--border-color);
|
| 213 |
+
}
|
| 214 |
+
|
| 215 |
+
.no-messages h2 {
|
| 216 |
+
margin-bottom: 0.5rem;
|
| 217 |
+
color: var(--text-muted);
|
| 218 |
+
}
|
| 219 |
+
|
| 220 |
+
.forward-source {
|
| 221 |
+
display: flex;
|
| 222 |
+
justify-content: space-between;
|
| 223 |
+
align-items: center;
|
| 224 |
+
padding: 0.5rem 0;
|
| 225 |
+
border-bottom: 1px solid var(--border-color);
|
| 226 |
+
}
|
| 227 |
+
|
| 228 |
+
.forward-source:last-child { border-bottom: none; }
|
| 229 |
+
|
| 230 |
+
.time-info {
|
| 231 |
+
font-size: 0.875rem;
|
| 232 |
+
color: var(--text-secondary);
|
| 233 |
+
padding: 0.5rem 0;
|
| 234 |
+
display: flex;
|
| 235 |
+
justify-content: space-between;
|
| 236 |
+
}
|
| 237 |
+
|
| 238 |
+
@media (max-width: 992px) {
|
| 239 |
+
.profile-grid {
|
| 240 |
+
grid-template-columns: 1fr;
|
| 241 |
+
}
|
| 242 |
+
.profile-card.full-width {
|
| 243 |
+
grid-column: span 1;
|
| 244 |
+
}
|
| 245 |
+
.profile-header {
|
| 246 |
+
flex-direction: column;
|
| 247 |
+
text-align: center;
|
| 248 |
+
}
|
| 249 |
+
.profile-meta {
|
| 250 |
+
justify-content: center;
|
| 251 |
+
}
|
| 252 |
+
}
|
| 253 |
+
</style>
|
| 254 |
+
</head>
|
| 255 |
+
<body>
|
| 256 |
+
<!-- Sidebar -->
|
| 257 |
+
<nav class="sidebar">
|
| 258 |
+
<div class="logo">
|
| 259 |
+
<span class="logo-icon">📊</span>
|
| 260 |
+
<span class="logo-text">TG Analytics</span>
|
| 261 |
+
</div>
|
| 262 |
+
<ul class="nav-menu">
|
| 263 |
+
<li class="nav-item">
|
| 264 |
+
<a href="/" class="nav-link">
|
| 265 |
+
<span class="icon">📈</span>
|
| 266 |
+
<span>Overview</span>
|
| 267 |
+
</a>
|
| 268 |
+
</li>
|
| 269 |
+
<li class="nav-item active">
|
| 270 |
+
<a href="/users" class="nav-link">
|
| 271 |
+
<span class="icon">👥</span>
|
| 272 |
+
<span>Users</span>
|
| 273 |
+
</a>
|
| 274 |
+
</li>
|
| 275 |
+
<li class="nav-item">
|
| 276 |
+
<a href="/chat" class="nav-link">
|
| 277 |
+
<span class="icon">💬</span>
|
| 278 |
+
<span>Chat</span>
|
| 279 |
+
</a>
|
| 280 |
+
</li>
|
| 281 |
+
<li class="nav-item">
|
| 282 |
+
<a href="/search" class="nav-link">
|
| 283 |
+
<span class="icon">🔍</span>
|
| 284 |
+
<span>Search</span>
|
| 285 |
+
</a>
|
| 286 |
+
</li>
|
| 287 |
+
<li class="nav-item">
|
| 288 |
+
<a href="/moderation" class="nav-link">
|
| 289 |
+
<span class="icon">🛡️</span>
|
| 290 |
+
<span>Moderation</span>
|
| 291 |
+
</a>
|
| 292 |
+
</li>
|
| 293 |
+
<li class="nav-item">
|
| 294 |
+
<a href="/settings" class="nav-link">
|
| 295 |
+
<span class="icon">⚙️</span>
|
| 296 |
+
<span>Settings</span>
|
| 297 |
+
</a>
|
| 298 |
+
</li>
|
| 299 |
+
</ul>
|
| 300 |
+
</nav>
|
| 301 |
+
|
| 302 |
+
<!-- Main Content -->
|
| 303 |
+
<main class="main-content">
|
| 304 |
+
<header class="header">
|
| 305 |
+
<h1><a href="/users" style="color: var(--text-muted); text-decoration: none;">← Users</a></h1>
|
| 306 |
+
</header>
|
| 307 |
+
|
| 308 |
+
<div id="profile-content">
|
| 309 |
+
<div class="loading"><div class="spinner"></div></div>
|
| 310 |
+
</div>
|
| 311 |
+
</main>
|
| 312 |
+
|
| 313 |
+
<script>
|
| 314 |
+
const USER_ID = '{{ user_id }}';
|
| 315 |
+
const COLORS = ['#e17076','#7bc862','#e5ca77','#65aadd','#a695e7','#ee7aae','#6ec9cb','#faa774'];
|
| 316 |
+
|
| 317 |
+
function getAvatarColor(name) {
|
| 318 |
+
let hash = 0;
|
| 319 |
+
for (let i = 0; i < name.length; i++) hash = name.charCodeAt(i) + ((hash << 5) - hash);
|
| 320 |
+
return COLORS[Math.abs(hash) % COLORS.length];
|
| 321 |
+
}
|
| 322 |
+
|
| 323 |
+
function formatNumber(num) {
|
| 324 |
+
if (num === null || num === undefined) return '-';
|
| 325 |
+
if (num >= 1000000) return (num / 1000000).toFixed(1) + 'M';
|
| 326 |
+
if (num >= 1000) return (num / 1000).toFixed(1) + 'K';
|
| 327 |
+
return num.toLocaleString();
|
| 328 |
+
}
|
| 329 |
+
|
| 330 |
+
function formatDate(ts) {
|
| 331 |
+
if (!ts) return '-';
|
| 332 |
+
const d = new Date(ts * 1000);
|
| 333 |
+
return d.toLocaleDateString('he-IL', { year: 'numeric', month: 'short', day: 'numeric' });
|
| 334 |
+
}
|
| 335 |
+
|
| 336 |
+
function formatDuration(seconds) {
|
| 337 |
+
if (!seconds) return '-';
|
| 338 |
+
if (seconds < 60) return Math.round(seconds) + 's';
|
| 339 |
+
if (seconds < 3600) return Math.round(seconds / 60) + 'm';
|
| 340 |
+
return (seconds / 3600).toFixed(1) + 'h';
|
| 341 |
+
}
|
| 342 |
+
|
| 343 |
+
function escapeHtml(text) {
|
| 344 |
+
const div = document.createElement('div');
|
| 345 |
+
div.textContent = text;
|
| 346 |
+
return div.innerHTML;
|
| 347 |
+
}
|
| 348 |
+
|
| 349 |
+
document.addEventListener('DOMContentLoaded', loadProfile);
|
| 350 |
+
|
| 351 |
+
async function loadProfile() {
|
| 352 |
+
const container = document.getElementById('profile-content');
|
| 353 |
+
try {
|
| 354 |
+
const resp = await fetch(`/api/user/${USER_ID}/profile`);
|
| 355 |
+
const data = await resp.json();
|
| 356 |
+
|
| 357 |
+
if (data.error) {
|
| 358 |
+
container.innerHTML = `<div class="empty-state"><h2>User not found</h2><p>${data.error}</p></div>`;
|
| 359 |
+
return;
|
| 360 |
+
}
|
| 361 |
+
|
| 362 |
+
if (!data.has_messages && data.participant) {
|
| 363 |
+
renderInactiveProfile(container, data);
|
| 364 |
+
return;
|
| 365 |
+
}
|
| 366 |
+
|
| 367 |
+
renderFullProfile(container, data);
|
| 368 |
+
} catch (err) {
|
| 369 |
+
container.innerHTML = `<div class="empty-state">Error loading profile: ${err.message}</div>`;
|
| 370 |
+
}
|
| 371 |
+
}
|
| 372 |
+
|
| 373 |
+
function renderInactiveProfile(container, data) {
|
| 374 |
+
const p = data.participant;
|
| 375 |
+
const name = data.name || 'Unknown';
|
| 376 |
+
const color = getAvatarColor(name);
|
| 377 |
+
const initial = name.charAt(0).toUpperCase();
|
| 378 |
+
|
| 379 |
+
let badges = '';
|
| 380 |
+
if (p.is_creator) badges += ' <span class="badge badge-creator">Creator</span>';
|
| 381 |
+
if (p.is_admin && !p.is_creator) badges += ' <span class="badge badge-admin">Admin</span>';
|
| 382 |
+
if (p.is_bot) badges += ' <span class="badge badge-bot">Bot</span>';
|
| 383 |
+
if (p.is_premium) badges += ' <span class="badge badge-premium">Premium</span>';
|
| 384 |
+
|
| 385 |
+
container.innerHTML = `
|
| 386 |
+
<div class="profile-header">
|
| 387 |
+
<div class="profile-avatar" style="background: ${color}">${initial}</div>
|
| 388 |
+
<div class="profile-info">
|
| 389 |
+
<div class="profile-name">${escapeHtml(name)}${badges}</div>
|
| 390 |
+
${p.username ? `<div style="color: var(--primary);">@${escapeHtml(p.username)}</div>` : ''}
|
| 391 |
+
<div class="profile-meta">
|
| 392 |
+
${p.join_date ? `<span>Joined: ${formatDate(p.join_date)}</span>` : ''}
|
| 393 |
+
<span>Status: <span class="badge badge-${p.last_status === 'online' ? 'online' : p.last_status === 'recently' ? 'recently' : 'offline'}">${p.last_status}</span></span>
|
| 394 |
+
</div>
|
| 395 |
+
</div>
|
| 396 |
+
</div>
|
| 397 |
+
<div class="no-messages">
|
| 398 |
+
<h2>No Messages</h2>
|
| 399 |
+
<p style="color: var(--text-muted);">This participant hasn't sent any messages in the group.</p>
|
| 400 |
+
</div>
|
| 401 |
+
`;
|
| 402 |
+
}
|
| 403 |
+
|
| 404 |
+
function renderFullProfile(container, data) {
|
| 405 |
+
const name = data.name || 'Unknown';
|
| 406 |
+
const color = getAvatarColor(name);
|
| 407 |
+
const initial = name.charAt(0).toUpperCase();
|
| 408 |
+
const p = data.participant;
|
| 409 |
+
|
| 410 |
+
// Badges
|
| 411 |
+
let badges = '';
|
| 412 |
+
if (p) {
|
| 413 |
+
if (p.is_creator) badges += ' <span class="badge badge-creator">Creator</span>';
|
| 414 |
+
if (p.is_admin && !p.is_creator) badges += ' <span class="badge badge-admin">Admin</span>';
|
| 415 |
+
if (p.is_bot) badges += ' <span class="badge badge-bot">Bot</span>';
|
| 416 |
+
if (p.is_premium) badges += ' <span class="badge badge-premium">Premium</span>';
|
| 417 |
+
}
|
| 418 |
+
|
| 419 |
+
// Header
|
| 420 |
+
let html = `
|
| 421 |
+
<div class="profile-header">
|
| 422 |
+
<div class="profile-avatar" style="background: ${color}">${initial}</div>
|
| 423 |
+
<div class="profile-info">
|
| 424 |
+
<div class="profile-name">${escapeHtml(name)}${badges}</div>
|
| 425 |
+
${p && p.username ? `<div style="color: var(--primary);">@${escapeHtml(p.username)}</div>` : ''}
|
| 426 |
+
<div class="profile-meta">
|
| 427 |
+
<span>#${data.rank} of ${data.total_active_users}</span>
|
| 428 |
+
<span>ID: ${data.user_id}</span>
|
| 429 |
+
${p && p.join_date ? `<span>Joined: ${formatDate(p.join_date)}</span>` : ''}
|
| 430 |
+
${p ? `<span>Status: <span class="badge badge-${p.last_status === 'online' ? 'online' : p.last_status === 'recently' ? 'recently' : 'offline'}">${p.last_status}</span></span>` : ''}
|
| 431 |
+
</div>
|
| 432 |
+
</div>
|
| 433 |
+
</div>
|
| 434 |
+
`;
|
| 435 |
+
|
| 436 |
+
// Stats grid
|
| 437 |
+
html += `
|
| 438 |
+
<div class="profile-stats">
|
| 439 |
+
<div class="profile-stat-card">
|
| 440 |
+
<div class="profile-stat-value">${formatNumber(data.total_messages)}</div>
|
| 441 |
+
<div class="profile-stat-label">Messages</div>
|
| 442 |
+
</div>
|
| 443 |
+
<div class="profile-stat-card">
|
| 444 |
+
<div class="profile-stat-value">${formatNumber(data.total_characters)}</div>
|
| 445 |
+
<div class="profile-stat-label">Characters</div>
|
| 446 |
+
</div>
|
| 447 |
+
<div class="profile-stat-card">
|
| 448 |
+
<div class="profile-stat-value">${data.avg_message_length}</div>
|
| 449 |
+
<div class="profile-stat-label">Avg Length</div>
|
| 450 |
+
</div>
|
| 451 |
+
<div class="profile-stat-card">
|
| 452 |
+
<div class="profile-stat-value">${data.active_days}</div>
|
| 453 |
+
<div class="profile-stat-label">Active Days</div>
|
| 454 |
+
</div>
|
| 455 |
+
<div class="profile-stat-card">
|
| 456 |
+
<div class="profile-stat-value">${data.daily_average}</div>
|
| 457 |
+
<div class="profile-stat-label">Daily Avg</div>
|
| 458 |
+
</div>
|
| 459 |
+
<div class="profile-stat-card">
|
| 460 |
+
<div class="profile-stat-value">${formatNumber(data.total_replies_sent)}</div>
|
| 461 |
+
<div class="profile-stat-label">Replies Sent</div>
|
| 462 |
+
</div>
|
| 463 |
+
<div class="profile-stat-card">
|
| 464 |
+
<div class="profile-stat-value">${formatNumber(data.total_replies_received)}</div>
|
| 465 |
+
<div class="profile-stat-label">Replies Received</div>
|
| 466 |
+
</div>
|
| 467 |
+
<div class="profile-stat-card">
|
| 468 |
+
<div class="profile-stat-value">${data.reply_ratio}%</div>
|
| 469 |
+
<div class="profile-stat-label">Reply Rate</div>
|
| 470 |
+
</div>
|
| 471 |
+
<div class="profile-stat-card">
|
| 472 |
+
<div class="profile-stat-value">${formatDuration(data.avg_reply_time_seconds)}</div>
|
| 473 |
+
<div class="profile-stat-label">Avg Reply Time</div>
|
| 474 |
+
</div>
|
| 475 |
+
<div class="profile-stat-card">
|
| 476 |
+
<div class="profile-stat-value">${formatNumber(data.links_shared)}</div>
|
| 477 |
+
<div class="profile-stat-label">Links</div>
|
| 478 |
+
</div>
|
| 479 |
+
<div class="profile-stat-card">
|
| 480 |
+
<div class="profile-stat-value">${formatNumber(data.media_sent)}</div>
|
| 481 |
+
<div class="profile-stat-label">Media</div>
|
| 482 |
+
</div>
|
| 483 |
+
<div class="profile-stat-card">
|
| 484 |
+
<div class="profile-stat-value">${formatNumber(data.forwards_sent)}</div>
|
| 485 |
+
<div class="profile-stat-label">Forwards</div>
|
| 486 |
+
</div>
|
| 487 |
+
</div>
|
| 488 |
+
`;
|
| 489 |
+
|
| 490 |
+
// Time info
|
| 491 |
+
html += `
|
| 492 |
+
<div class="profile-card full-width" style="margin-bottom: 1.5rem;">
|
| 493 |
+
<h3>Timeline</h3>
|
| 494 |
+
<div class="time-info">
|
| 495 |
+
<span>First message: ${formatDate(data.first_message)}</span>
|
| 496 |
+
<span>Last message: ${formatDate(data.last_message)}</span>
|
| 497 |
+
</div>
|
| 498 |
+
<div class="time-info">
|
| 499 |
+
<span>Edits: ${formatNumber(data.edits)}</span>
|
| 500 |
+
<span>Mentions: ${formatNumber(data.mentions_made)}</span>
|
| 501 |
+
</div>
|
| 502 |
+
</div>
|
| 503 |
+
`;
|
| 504 |
+
|
| 505 |
+
// Charts + Reply network
|
| 506 |
+
html += `<div class="profile-grid">`;
|
| 507 |
+
|
| 508 |
+
// Hourly chart
|
| 509 |
+
html += `
|
| 510 |
+
<div class="profile-card">
|
| 511 |
+
<h3>Activity by Hour</h3>
|
| 512 |
+
<div style="height: 200px;"><canvas id="hourly-chart"></canvas></div>
|
| 513 |
+
</div>
|
| 514 |
+
`;
|
| 515 |
+
|
| 516 |
+
// Weekday chart
|
| 517 |
+
html += `
|
| 518 |
+
<div class="profile-card">
|
| 519 |
+
<h3>Activity by Day of Week</h3>
|
| 520 |
+
<div style="height: 200px;"><canvas id="weekday-chart"></canvas></div>
|
| 521 |
+
</div>
|
| 522 |
+
`;
|
| 523 |
+
|
| 524 |
+
// Monthly trend
|
| 525 |
+
html += `
|
| 526 |
+
<div class="profile-card full-width">
|
| 527 |
+
<h3>Monthly Trend</h3>
|
| 528 |
+
<div style="height: 200px;"><canvas id="monthly-chart"></canvas></div>
|
| 529 |
+
</div>
|
| 530 |
+
`;
|
| 531 |
+
|
| 532 |
+
// Daily activity (last 90 days)
|
| 533 |
+
html += `
|
| 534 |
+
<div class="profile-card full-width">
|
| 535 |
+
<h3>Daily Activity (Last 90 Days)</h3>
|
| 536 |
+
<div style="height: 200px;"><canvas id="daily-chart"></canvas></div>
|
| 537 |
+
</div>
|
| 538 |
+
`;
|
| 539 |
+
|
| 540 |
+
// Replies to (top 10)
|
| 541 |
+
const maxReplyTo = data.replies_to.length > 0 ? data.replies_to[0].count : 1;
|
| 542 |
+
html += `
|
| 543 |
+
<div class="profile-card">
|
| 544 |
+
<h3>Most Replies To</h3>
|
| 545 |
+
${data.replies_to.length === 0 ? '<p style="color: var(--text-muted);">No reply data</p>' : ''}
|
| 546 |
+
<ul class="reply-network-list">
|
| 547 |
+
${data.replies_to.map(r => `
|
| 548 |
+
<li class="reply-network-item">
|
| 549 |
+
<div class="reply-network-name">
|
| 550 |
+
<a href="/user/${r.user_id}">${escapeHtml(r.name)}</a>
|
| 551 |
+
</div>
|
| 552 |
+
<span class="reply-network-count">${r.count}</span>
|
| 553 |
+
</li>
|
| 554 |
+
<div class="reply-bar"><div class="reply-bar-fill" style="width: ${(r.count / maxReplyTo * 100).toFixed(1)}%"></div></div>
|
| 555 |
+
`).join('')}
|
| 556 |
+
</ul>
|
| 557 |
+
</div>
|
| 558 |
+
`;
|
| 559 |
+
|
| 560 |
+
// Replies from (top 10)
|
| 561 |
+
const maxReplyFrom = data.replies_from.length > 0 ? data.replies_from[0].count : 1;
|
| 562 |
+
html += `
|
| 563 |
+
<div class="profile-card">
|
| 564 |
+
<h3>Most Replies From</h3>
|
| 565 |
+
${data.replies_from.length === 0 ? '<p style="color: var(--text-muted);">No reply data</p>' : ''}
|
| 566 |
+
<ul class="reply-network-list">
|
| 567 |
+
${data.replies_from.map(r => `
|
| 568 |
+
<li class="reply-network-item">
|
| 569 |
+
<div class="reply-network-name">
|
| 570 |
+
<a href="/user/${r.user_id}">${escapeHtml(r.name)}</a>
|
| 571 |
+
</div>
|
| 572 |
+
<span class="reply-network-count">${r.count}</span>
|
| 573 |
+
</li>
|
| 574 |
+
<div class="reply-bar"><div class="reply-bar-fill" style="width: ${(r.count / maxReplyFrom * 100).toFixed(1)}%; background: #28a745;"></div></div>
|
| 575 |
+
`).join('')}
|
| 576 |
+
</ul>
|
| 577 |
+
</div>
|
| 578 |
+
`;
|
| 579 |
+
|
| 580 |
+
// Top forward sources
|
| 581 |
+
if (data.top_forward_sources && data.top_forward_sources.length > 0) {
|
| 582 |
+
html += `
|
| 583 |
+
<div class="profile-card">
|
| 584 |
+
<h3>Top Forward Sources</h3>
|
| 585 |
+
${data.top_forward_sources.map(f => `
|
| 586 |
+
<div class="forward-source">
|
| 587 |
+
<span>${escapeHtml(f.name)}</span>
|
| 588 |
+
<span class="reply-network-count">${f.count}</span>
|
| 589 |
+
</div>
|
| 590 |
+
`).join('')}
|
| 591 |
+
</div>
|
| 592 |
+
`;
|
| 593 |
+
}
|
| 594 |
+
|
| 595 |
+
// Top links
|
| 596 |
+
if (data.top_links && data.top_links.length > 0) {
|
| 597 |
+
html += `
|
| 598 |
+
<div class="profile-card">
|
| 599 |
+
<h3>Top Links Shared</h3>
|
| 600 |
+
<ul class="links-list">
|
| 601 |
+
${data.top_links.map(l => `
|
| 602 |
+
<li>
|
| 603 |
+
<a href="${escapeHtml(l.url)}" target="_blank" rel="noopener">${escapeHtml(l.url.length > 50 ? l.url.substring(0, 50) + '...' : l.url)}</a>
|
| 604 |
+
<span class="count">${l.count}x</span>
|
| 605 |
+
</li>
|
| 606 |
+
`).join('')}
|
| 607 |
+
</ul>
|
| 608 |
+
</div>
|
| 609 |
+
`;
|
| 610 |
+
}
|
| 611 |
+
|
| 612 |
+
html += `</div>`; // close profile-grid
|
| 613 |
+
|
| 614 |
+
container.innerHTML = html;
|
| 615 |
+
|
| 616 |
+
// Render charts
|
| 617 |
+
renderHourlyChart(data.hourly_activity);
|
| 618 |
+
renderWeekdayChart(data.weekday_activity);
|
| 619 |
+
renderMonthlyChart(data.monthly_activity);
|
| 620 |
+
renderDailyChart(data.daily_activity);
|
| 621 |
+
}
|
| 622 |
+
|
| 623 |
+
function chartDefaults() {
|
| 624 |
+
return {
|
| 625 |
+
responsive: true,
|
| 626 |
+
maintainAspectRatio: false,
|
| 627 |
+
plugins: { legend: { display: false } },
|
| 628 |
+
scales: {
|
| 629 |
+
y: {
|
| 630 |
+
beginAtZero: true,
|
| 631 |
+
grid: { color: 'rgba(255,255,255,0.05)' },
|
| 632 |
+
ticks: { color: '#718096' }
|
| 633 |
+
},
|
| 634 |
+
x: {
|
| 635 |
+
grid: { display: false },
|
| 636 |
+
ticks: { color: '#718096', maxRotation: 0, autoSkip: true, maxTicksLimit: 12 }
|
| 637 |
+
}
|
| 638 |
+
}
|
| 639 |
+
};
|
| 640 |
+
}
|
| 641 |
+
|
| 642 |
+
function renderHourlyChart(hourly) {
|
| 643 |
+
const ctx = document.getElementById('hourly-chart');
|
| 644 |
+
if (!ctx) return;
|
| 645 |
+
new Chart(ctx.getContext('2d'), {
|
| 646 |
+
type: 'bar',
|
| 647 |
+
data: {
|
| 648 |
+
labels: Array.from({length: 24}, (_, i) => `${i}:00`),
|
| 649 |
+
datasets: [{
|
| 650 |
+
data: hourly,
|
| 651 |
+
backgroundColor: 'rgba(0, 136, 204, 0.6)',
|
| 652 |
+
borderColor: 'rgba(0, 136, 204, 1)',
|
| 653 |
+
borderWidth: 1
|
| 654 |
+
}]
|
| 655 |
+
},
|
| 656 |
+
options: chartDefaults()
|
| 657 |
+
});
|
| 658 |
+
}
|
| 659 |
+
|
| 660 |
+
function renderWeekdayChart(weekday) {
|
| 661 |
+
const ctx = document.getElementById('weekday-chart');
|
| 662 |
+
if (!ctx) return;
|
| 663 |
+
new Chart(ctx.getContext('2d'), {
|
| 664 |
+
type: 'bar',
|
| 665 |
+
data: {
|
| 666 |
+
labels: weekday.map(w => w.day.substring(0, 3)),
|
| 667 |
+
datasets: [{
|
| 668 |
+
data: weekday.map(w => w.count),
|
| 669 |
+
backgroundColor: weekday.map((w, i) => i === 5 || i === 6
|
| 670 |
+
? 'rgba(40, 167, 69, 0.6)'
|
| 671 |
+
: 'rgba(0, 136, 204, 0.6)'),
|
| 672 |
+
borderWidth: 1
|
| 673 |
+
}]
|
| 674 |
+
},
|
| 675 |
+
options: chartDefaults()
|
| 676 |
+
});
|
| 677 |
+
}
|
| 678 |
+
|
| 679 |
+
function renderMonthlyChart(monthly) {
|
| 680 |
+
const ctx = document.getElementById('monthly-chart');
|
| 681 |
+
if (!ctx) return;
|
| 682 |
+
new Chart(ctx.getContext('2d'), {
|
| 683 |
+
type: 'line',
|
| 684 |
+
data: {
|
| 685 |
+
labels: monthly.map(m => m.month),
|
| 686 |
+
datasets: [{
|
| 687 |
+
data: monthly.map(m => m.count),
|
| 688 |
+
borderColor: '#0088cc',
|
| 689 |
+
backgroundColor: 'rgba(0, 136, 204, 0.1)',
|
| 690 |
+
fill: true,
|
| 691 |
+
tension: 0.3,
|
| 692 |
+
pointRadius: 3,
|
| 693 |
+
pointHoverRadius: 6
|
| 694 |
+
}]
|
| 695 |
+
},
|
| 696 |
+
options: chartDefaults()
|
| 697 |
+
});
|
| 698 |
+
}
|
| 699 |
+
|
| 700 |
+
function renderDailyChart(daily) {
|
| 701 |
+
const ctx = document.getElementById('daily-chart');
|
| 702 |
+
if (!ctx) return;
|
| 703 |
+
// Reverse to chronological order
|
| 704 |
+
const sorted = [...daily].reverse();
|
| 705 |
+
new Chart(ctx.getContext('2d'), {
|
| 706 |
+
type: 'bar',
|
| 707 |
+
data: {
|
| 708 |
+
labels: sorted.map(d => d.date.substring(5)), // MM-DD
|
| 709 |
+
datasets: [{
|
| 710 |
+
data: sorted.map(d => d.count),
|
| 711 |
+
backgroundColor: 'rgba(0, 136, 204, 0.4)',
|
| 712 |
+
borderColor: 'rgba(0, 136, 204, 0.8)',
|
| 713 |
+
borderWidth: 1
|
| 714 |
+
}]
|
| 715 |
+
},
|
| 716 |
+
options: chartDefaults()
|
| 717 |
+
});
|
| 718 |
+
}
|
| 719 |
+
</script>
|
| 720 |
+
</body>
|
| 721 |
+
</html>
|
templates/users.html
ADDED
|
@@ -0,0 +1,344 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>Users - Telegram Analytics</title>
|
| 7 |
+
<link rel="stylesheet" href="/static/css/style.css">
|
| 8 |
+
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
| 9 |
+
</head>
|
| 10 |
+
<body>
|
| 11 |
+
<!-- Sidebar -->
|
| 12 |
+
<nav class="sidebar">
|
| 13 |
+
<div class="logo">
|
| 14 |
+
<span class="logo-icon">📊</span>
|
| 15 |
+
<span class="logo-text">TG Analytics</span>
|
| 16 |
+
</div>
|
| 17 |
+
<ul class="nav-menu">
|
| 18 |
+
<li class="nav-item">
|
| 19 |
+
<a href="/" class="nav-link">
|
| 20 |
+
<span class="icon">📈</span>
|
| 21 |
+
<span>Overview</span>
|
| 22 |
+
</a>
|
| 23 |
+
</li>
|
| 24 |
+
<li class="nav-item active">
|
| 25 |
+
<a href="/users" class="nav-link">
|
| 26 |
+
<span class="icon">👥</span>
|
| 27 |
+
<span>Users</span>
|
| 28 |
+
</a>
|
| 29 |
+
</li>
|
| 30 |
+
<li class="nav-item">
|
| 31 |
+
<a href="/chat" class="nav-link">
|
| 32 |
+
<span class="icon">💬</span>
|
| 33 |
+
<span>Chat</span>
|
| 34 |
+
</a>
|
| 35 |
+
</li>
|
| 36 |
+
<li class="nav-item">
|
| 37 |
+
<a href="/search" class="nav-link">
|
| 38 |
+
<span class="icon">🔍</span>
|
| 39 |
+
<span>Search</span>
|
| 40 |
+
</a>
|
| 41 |
+
</li>
|
| 42 |
+
<li class="nav-item">
|
| 43 |
+
<a href="/moderation" class="nav-link">
|
| 44 |
+
<span class="icon">🛡️</span>
|
| 45 |
+
<span>Moderation</span>
|
| 46 |
+
</a>
|
| 47 |
+
</li>
|
| 48 |
+
<li class="nav-item">
|
| 49 |
+
<a href="/settings" class="nav-link">
|
| 50 |
+
<span class="icon">⚙️</span>
|
| 51 |
+
<span>Settings</span>
|
| 52 |
+
</a>
|
| 53 |
+
</li>
|
| 54 |
+
</ul>
|
| 55 |
+
<div class="sidebar-footer">
|
| 56 |
+
<div class="export-buttons">
|
| 57 |
+
<button onclick="exportUsers()" class="btn btn-sm">📥 Export Users</button>
|
| 58 |
+
</div>
|
| 59 |
+
</div>
|
| 60 |
+
</nav>
|
| 61 |
+
|
| 62 |
+
<!-- Main Content -->
|
| 63 |
+
<main class="main-content">
|
| 64 |
+
<!-- Header -->
|
| 65 |
+
<header class="header">
|
| 66 |
+
<h1>User Leaderboard</h1>
|
| 67 |
+
<div class="header-controls">
|
| 68 |
+
<select id="timeframe" class="select" onchange="loadUsers()">
|
| 69 |
+
<option value="today">Today</option>
|
| 70 |
+
<option value="yesterday">Yesterday</option>
|
| 71 |
+
<option value="week">This Week</option>
|
| 72 |
+
<option value="month" selected>This Month</option>
|
| 73 |
+
<option value="year">This Year</option>
|
| 74 |
+
<option value="all">All Time</option>
|
| 75 |
+
</select>
|
| 76 |
+
<button onclick="loadUsers()" class="btn btn-primary">🔄 Refresh</button>
|
| 77 |
+
</div>
|
| 78 |
+
</header>
|
| 79 |
+
|
| 80 |
+
<!-- User Stats Summary -->
|
| 81 |
+
<section class="stats-grid">
|
| 82 |
+
<div class="stat-card">
|
| 83 |
+
<div class="stat-icon">👥</div>
|
| 84 |
+
<div class="stat-content">
|
| 85 |
+
<div class="stat-value" id="total-users">-</div>
|
| 86 |
+
<div class="stat-label">Total Members</div>
|
| 87 |
+
</div>
|
| 88 |
+
</div>
|
| 89 |
+
<div class="stat-card">
|
| 90 |
+
<div class="stat-icon">💬</div>
|
| 91 |
+
<div class="stat-content">
|
| 92 |
+
<div class="stat-value" id="total-active">-</div>
|
| 93 |
+
<div class="stat-label">Active Users</div>
|
| 94 |
+
</div>
|
| 95 |
+
</div>
|
| 96 |
+
<div class="stat-card">
|
| 97 |
+
<div class="stat-icon">🏆</div>
|
| 98 |
+
<div class="stat-content">
|
| 99 |
+
<div class="stat-value" id="top-user">-</div>
|
| 100 |
+
<div class="stat-label">Top User</div>
|
| 101 |
+
</div>
|
| 102 |
+
</div>
|
| 103 |
+
<div class="stat-card">
|
| 104 |
+
<div class="stat-icon">📊</div>
|
| 105 |
+
<div class="stat-content">
|
| 106 |
+
<div class="stat-value" id="avg-messages">-</div>
|
| 107 |
+
<div class="stat-label">Avg Messages/User</div>
|
| 108 |
+
</div>
|
| 109 |
+
</div>
|
| 110 |
+
</section>
|
| 111 |
+
|
| 112 |
+
<!-- Users Table -->
|
| 113 |
+
<section class="chart-card full-width">
|
| 114 |
+
<div class="chart-header">
|
| 115 |
+
<h3>All Users</h3>
|
| 116 |
+
<div style="display: flex; gap: 1rem; align-items: center;">
|
| 117 |
+
<input type="search" id="user-search" placeholder="Search users..."
|
| 118 |
+
style="width: 200px;" onkeyup="filterUsers()">
|
| 119 |
+
<span id="showing-count" style="color: var(--text-muted); font-size: 0.875rem;"></span>
|
| 120 |
+
</div>
|
| 121 |
+
</div>
|
| 122 |
+
<div style="overflow-x: auto;">
|
| 123 |
+
<table class="users-table">
|
| 124 |
+
<thead>
|
| 125 |
+
<tr>
|
| 126 |
+
<th style="width: 60px;">Rank</th>
|
| 127 |
+
<th>User</th>
|
| 128 |
+
<th style="width: 80px;">Role</th>
|
| 129 |
+
<th style="width: 120px;">Messages</th>
|
| 130 |
+
<th style="width: 100px;">Share</th>
|
| 131 |
+
<th style="width: 100px;">Links</th>
|
| 132 |
+
<th style="width: 100px;">Media</th>
|
| 133 |
+
<th style="width: 100px;">Active Days</th>
|
| 134 |
+
<th style="width: 100px;">Daily Avg</th>
|
| 135 |
+
</tr>
|
| 136 |
+
</thead>
|
| 137 |
+
<tbody id="users-table-body">
|
| 138 |
+
<tr>
|
| 139 |
+
<td colspan="8" class="loading">
|
| 140 |
+
<div class="spinner"></div>
|
| 141 |
+
</td>
|
| 142 |
+
</tr>
|
| 143 |
+
</tbody>
|
| 144 |
+
</table>
|
| 145 |
+
</div>
|
| 146 |
+
<!-- Pagination -->
|
| 147 |
+
<div class="pagination" id="pagination"></div>
|
| 148 |
+
</section>
|
| 149 |
+
</main>
|
| 150 |
+
|
| 151 |
+
<script src="/static/js/dashboard.js"></script>
|
| 152 |
+
<script>
|
| 153 |
+
// State
|
| 154 |
+
let allUsers = [];
|
| 155 |
+
let currentPage = 1;
|
| 156 |
+
const pageSize = 20;
|
| 157 |
+
|
| 158 |
+
// Initialize
|
| 159 |
+
document.addEventListener('DOMContentLoaded', () => {
|
| 160 |
+
loadUsers();
|
| 161 |
+
});
|
| 162 |
+
|
| 163 |
+
async function loadUsers() {
|
| 164 |
+
const timeframe = document.getElementById('timeframe').value;
|
| 165 |
+
const tbody = document.getElementById('users-table-body');
|
| 166 |
+
tbody.innerHTML = '<tr><td colspan="9" class="loading"><div class="spinner"></div></td></tr>';
|
| 167 |
+
|
| 168 |
+
try {
|
| 169 |
+
const response = await fetch(`/api/users?timeframe=${timeframe}&limit=500&include_inactive=1`);
|
| 170 |
+
const data = await response.json();
|
| 171 |
+
allUsers = data.users;
|
| 172 |
+
|
| 173 |
+
// Update summary stats
|
| 174 |
+
document.getElementById('total-users').textContent = formatNumber(data.total);
|
| 175 |
+
document.getElementById('total-active').textContent = formatNumber(data.total_active);
|
| 176 |
+
if (allUsers.length > 0) {
|
| 177 |
+
const activeUsers = allUsers.filter(u => u.messages > 0);
|
| 178 |
+
if (activeUsers.length > 0) {
|
| 179 |
+
document.getElementById('top-user').textContent = activeUsers[0].name;
|
| 180 |
+
const totalMessages = activeUsers.reduce((sum, u) => sum + u.messages, 0);
|
| 181 |
+
document.getElementById('avg-messages').textContent =
|
| 182 |
+
formatNumber(Math.round(totalMessages / activeUsers.length));
|
| 183 |
+
}
|
| 184 |
+
}
|
| 185 |
+
|
| 186 |
+
currentPage = 1;
|
| 187 |
+
renderUsers();
|
| 188 |
+
} catch (error) {
|
| 189 |
+
tbody.innerHTML = '<tr><td colspan="9" class="empty-state">Error loading users</td></tr>';
|
| 190 |
+
}
|
| 191 |
+
}
|
| 192 |
+
|
| 193 |
+
function filterUsers() {
|
| 194 |
+
currentPage = 1;
|
| 195 |
+
renderUsers();
|
| 196 |
+
}
|
| 197 |
+
|
| 198 |
+
function renderUsers() {
|
| 199 |
+
const search = document.getElementById('user-search').value.toLowerCase();
|
| 200 |
+
const filtered = allUsers.filter(u =>
|
| 201 |
+
u.name.toLowerCase().includes(search) ||
|
| 202 |
+
u.user_id.toLowerCase().includes(search)
|
| 203 |
+
);
|
| 204 |
+
|
| 205 |
+
const start = (currentPage - 1) * pageSize;
|
| 206 |
+
const end = start + pageSize;
|
| 207 |
+
const pageUsers = filtered.slice(start, end);
|
| 208 |
+
|
| 209 |
+
document.getElementById('showing-count').textContent =
|
| 210 |
+
`Showing ${start + 1}-${Math.min(end, filtered.length)} of ${filtered.length}`;
|
| 211 |
+
|
| 212 |
+
const tbody = document.getElementById('users-table-body');
|
| 213 |
+
|
| 214 |
+
if (pageUsers.length === 0) {
|
| 215 |
+
tbody.innerHTML = '<tr><td colspan="9" class="empty-state">No users found</td></tr>';
|
| 216 |
+
return;
|
| 217 |
+
}
|
| 218 |
+
|
| 219 |
+
tbody.innerHTML = pageUsers.map((user, i) => {
|
| 220 |
+
const rank = user.rank || '-';
|
| 221 |
+
const rankClass = rank === 1 ? 'gold' : rank === 2 ? 'silver' : rank === 3 ? 'bronze' : '';
|
| 222 |
+
const initial = user.name.charAt(0).toUpperCase();
|
| 223 |
+
const isInactive = user.messages === 0;
|
| 224 |
+
const rowStyle = isInactive ? 'opacity: 0.6;' : '';
|
| 225 |
+
|
| 226 |
+
let roleBadge = '';
|
| 227 |
+
if (user.role === 'creator') roleBadge = '<span style="background:#ffd700;color:#1a1a2e;padding:2px 6px;border-radius:4px;font-size:0.7rem;font-weight:600;">Creator</span>';
|
| 228 |
+
else if (user.role === 'admin') roleBadge = '<span style="background:#28a745;color:white;padding:2px 6px;border-radius:4px;font-size:0.7rem;font-weight:600;">Admin</span>';
|
| 229 |
+
else if (user.role === 'bot') roleBadge = '<span style="background:#6c757d;color:white;padding:2px 6px;border-radius:4px;font-size:0.7rem;font-weight:600;">Bot</span>';
|
| 230 |
+
|
| 231 |
+
const subtitle = user.username
|
| 232 |
+
? `@${escapeHtml(user.username)}`
|
| 233 |
+
: `ID: ${user.user_id}`;
|
| 234 |
+
|
| 235 |
+
return `
|
| 236 |
+
<tr onclick="window.location.href='/user/${user.user_id}'" style="cursor: pointer; ${rowStyle}">
|
| 237 |
+
<td><span class="list-rank ${rankClass}">${rank !== '-' ? '#' + rank : '-'}</span></td>
|
| 238 |
+
<td>
|
| 239 |
+
<div class="user-cell">
|
| 240 |
+
<div class="user-avatar">${initial}</div>
|
| 241 |
+
<div>
|
| 242 |
+
<div class="list-name">${escapeHtml(user.name)}</div>
|
| 243 |
+
<div class="list-subtitle">${subtitle}</div>
|
| 244 |
+
</div>
|
| 245 |
+
</div>
|
| 246 |
+
</td>
|
| 247 |
+
<td>${roleBadge}</td>
|
| 248 |
+
<td>
|
| 249 |
+
${isInactive ? '<span style="color: var(--text-muted);">-</span>' : `
|
| 250 |
+
<strong>${formatNumber(user.messages)}</strong>
|
| 251 |
+
<div class="progress-bar">
|
| 252 |
+
<div class="progress-fill" style="width: ${user.percentage}%"></div>
|
| 253 |
+
</div>`}
|
| 254 |
+
</td>
|
| 255 |
+
<td>${isInactive ? '-' : user.percentage + '%'}</td>
|
| 256 |
+
<td>${isInactive ? '-' : formatNumber(user.links)}</td>
|
| 257 |
+
<td>${isInactive ? '-' : formatNumber(user.media)}</td>
|
| 258 |
+
<td>${isInactive ? '-' : user.active_days}</td>
|
| 259 |
+
<td>${isInactive ? '-' : user.daily_average}</td>
|
| 260 |
+
</tr>
|
| 261 |
+
`;
|
| 262 |
+
}).join('');
|
| 263 |
+
|
| 264 |
+
// Render pagination
|
| 265 |
+
const totalPages = Math.ceil(filtered.length / pageSize);
|
| 266 |
+
renderPagination(totalPages);
|
| 267 |
+
}
|
| 268 |
+
|
| 269 |
+
function renderPagination(totalPages) {
|
| 270 |
+
const pagination = document.getElementById('pagination');
|
| 271 |
+
|
| 272 |
+
if (totalPages <= 1) {
|
| 273 |
+
pagination.innerHTML = '';
|
| 274 |
+
return;
|
| 275 |
+
}
|
| 276 |
+
|
| 277 |
+
let html = '';
|
| 278 |
+
|
| 279 |
+
// Previous button
|
| 280 |
+
html += `<button class="page-btn" onclick="goToPage(${currentPage - 1})"
|
| 281 |
+
${currentPage === 1 ? 'disabled' : ''}>«</button>`;
|
| 282 |
+
|
| 283 |
+
// Page numbers
|
| 284 |
+
const maxVisible = 5;
|
| 285 |
+
let startPage = Math.max(1, currentPage - Math.floor(maxVisible / 2));
|
| 286 |
+
let endPage = Math.min(totalPages, startPage + maxVisible - 1);
|
| 287 |
+
|
| 288 |
+
if (endPage - startPage < maxVisible - 1) {
|
| 289 |
+
startPage = Math.max(1, endPage - maxVisible + 1);
|
| 290 |
+
}
|
| 291 |
+
|
| 292 |
+
if (startPage > 1) {
|
| 293 |
+
html += `<button class="page-btn" onclick="goToPage(1)">1</button>`;
|
| 294 |
+
if (startPage > 2) html += `<span style="padding: 0 0.5rem;">...</span>`;
|
| 295 |
+
}
|
| 296 |
+
|
| 297 |
+
for (let i = startPage; i <= endPage; i++) {
|
| 298 |
+
html += `<button class="page-btn ${i === currentPage ? 'active' : ''}"
|
| 299 |
+
onclick="goToPage(${i})">${i}</button>`;
|
| 300 |
+
}
|
| 301 |
+
|
| 302 |
+
if (endPage < totalPages) {
|
| 303 |
+
if (endPage < totalPages - 1) html += `<span style="padding: 0 0.5rem;">...</span>`;
|
| 304 |
+
html += `<button class="page-btn" onclick="goToPage(${totalPages})">${totalPages}</button>`;
|
| 305 |
+
}
|
| 306 |
+
|
| 307 |
+
// Next button
|
| 308 |
+
html += `<button class="page-btn" onclick="goToPage(${currentPage + 1})"
|
| 309 |
+
${currentPage === totalPages ? 'disabled' : ''}>»</button>`;
|
| 310 |
+
|
| 311 |
+
pagination.innerHTML = html;
|
| 312 |
+
}
|
| 313 |
+
|
| 314 |
+
function goToPage(page) {
|
| 315 |
+
currentPage = page;
|
| 316 |
+
renderUsers();
|
| 317 |
+
window.scrollTo({ top: 0, behavior: 'smooth' });
|
| 318 |
+
}
|
| 319 |
+
|
| 320 |
+
function openUserProfile(userId) {
|
| 321 |
+
window.location.href = `/user/${userId}`;
|
| 322 |
+
}
|
| 323 |
+
|
| 324 |
+
// Export function
|
| 325 |
+
function exportUsers() {
|
| 326 |
+
const timeframe = document.getElementById('timeframe').value;
|
| 327 |
+
window.location.href = `/api/export/users?timeframe=${timeframe}`;
|
| 328 |
+
}
|
| 329 |
+
|
| 330 |
+
// Helper functions
|
| 331 |
+
function formatNumber(num) {
|
| 332 |
+
if (num >= 1000000) return (num / 1000000).toFixed(1) + 'M';
|
| 333 |
+
if (num >= 1000) return (num / 1000).toFixed(1) + 'K';
|
| 334 |
+
return num.toString();
|
| 335 |
+
}
|
| 336 |
+
|
| 337 |
+
function escapeHtml(text) {
|
| 338 |
+
const div = document.createElement('div');
|
| 339 |
+
div.textContent = text;
|
| 340 |
+
return div.innerHTML;
|
| 341 |
+
}
|
| 342 |
+
</script>
|
| 343 |
+
</body>
|
| 344 |
+
</html>
|