Spaces:
Sleeping
Sleeping
| -- Telegram Chat Indexing Schema (Optimized) | |
| -- SQLite with FTS5 for full-text search + performance optimizations | |
| -- ============================================ | |
| -- PRAGMA OPTIMIZATIONS | |
| -- ============================================ | |
| PRAGMA journal_mode = WAL; -- Write-Ahead Logging for better concurrency | |
| PRAGMA synchronous = NORMAL; -- Balance between safety and speed | |
| PRAGMA cache_size = -64000; -- 64MB cache | |
| PRAGMA temp_store = MEMORY; -- Store temp tables in memory | |
| PRAGMA mmap_size = 268435456; -- 256MB memory-mapped I/O | |
| -- ============================================ | |
| -- MAIN TABLES | |
| -- ============================================ | |
| -- Main messages table | |
| CREATE TABLE IF NOT EXISTS messages ( | |
| id INTEGER PRIMARY KEY, | |
| type TEXT DEFAULT 'message', | |
| date TEXT, | |
| date_unixtime INTEGER NOT NULL, | |
| from_name TEXT, | |
| from_id TEXT NOT NULL, | |
| reply_to_message_id INTEGER, | |
| forwarded_from TEXT, | |
| forwarded_from_id TEXT, | |
| text_plain TEXT, | |
| text_length INTEGER DEFAULT 0, | |
| has_media INTEGER DEFAULT 0, | |
| has_photo INTEGER DEFAULT 0, | |
| has_links INTEGER DEFAULT 0, | |
| has_mentions INTEGER DEFAULT 0, | |
| is_edited INTEGER DEFAULT 0, | |
| edited_unixtime INTEGER, | |
| photo_file_size INTEGER, | |
| photo_width INTEGER, | |
| photo_height INTEGER, | |
| raw_json TEXT | |
| ); | |
| -- Users table (extracted from messages) | |
| CREATE TABLE IF NOT EXISTS users ( | |
| user_id TEXT PRIMARY KEY, | |
| display_name TEXT, | |
| first_seen INTEGER, | |
| last_seen INTEGER, | |
| message_count INTEGER DEFAULT 0 | |
| ); | |
| -- Entities table (links, mentions, etc.) | |
| CREATE TABLE IF NOT EXISTS entities ( | |
| id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| message_id INTEGER NOT NULL, | |
| type TEXT NOT NULL, | |
| value TEXT NOT NULL, | |
| FOREIGN KEY (message_id) REFERENCES messages(id) ON DELETE CASCADE | |
| ); | |
| -- ============================================ | |
| -- GRAPH STRUCTURE FOR REPLY THREADS | |
| -- ============================================ | |
| -- Pre-computed reply graph edges for fast traversal | |
| CREATE TABLE IF NOT EXISTS reply_graph ( | |
| parent_id INTEGER NOT NULL, | |
| child_id INTEGER NOT NULL, | |
| depth INTEGER DEFAULT 1, | |
| PRIMARY KEY (parent_id, child_id) | |
| ); | |
| -- Conversation threads (connected components) | |
| CREATE TABLE IF NOT EXISTS threads ( | |
| thread_id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| root_message_id INTEGER UNIQUE, | |
| message_count INTEGER DEFAULT 0, | |
| first_message_time INTEGER, | |
| last_message_time INTEGER, | |
| participant_count INTEGER DEFAULT 0 | |
| ); | |
| -- Message to thread mapping | |
| CREATE TABLE IF NOT EXISTS message_threads ( | |
| message_id INTEGER PRIMARY KEY, | |
| thread_id INTEGER NOT NULL, | |
| depth INTEGER DEFAULT 0, | |
| FOREIGN KEY (thread_id) REFERENCES threads(thread_id) | |
| ); | |
| -- ============================================ | |
| -- TRIGRAM INDEX FOR FUZZY SEARCH | |
| -- ============================================ | |
| -- Trigrams for fuzzy/approximate string matching | |
| CREATE TABLE IF NOT EXISTS trigrams ( | |
| trigram TEXT NOT NULL, | |
| message_id INTEGER NOT NULL, | |
| position INTEGER NOT NULL, | |
| PRIMARY KEY (trigram, message_id, position) | |
| ); | |
| -- ============================================ | |
| -- FTS5 FULL-TEXT SEARCH (OPTIMIZED) | |
| -- ============================================ | |
| -- Full-text search with prefix index for autocomplete | |
| CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts USING fts5( | |
| text_plain, | |
| from_name, | |
| content='messages', | |
| content_rowid='id', | |
| tokenize='unicode61 remove_diacritics 2', | |
| prefix='2 3 4' -- Enable prefix queries for autocomplete | |
| ); | |
| -- Triggers to keep FTS in sync | |
| CREATE TRIGGER IF NOT EXISTS messages_ai AFTER INSERT ON messages BEGIN | |
| INSERT INTO messages_fts(rowid, text_plain, from_name) | |
| VALUES (new.id, new.text_plain, new.from_name); | |
| END; | |
| CREATE TRIGGER IF NOT EXISTS messages_ad AFTER DELETE ON messages BEGIN | |
| INSERT INTO messages_fts(messages_fts, rowid, text_plain, from_name) | |
| VALUES ('delete', old.id, old.text_plain, old.from_name); | |
| END; | |
| CREATE TRIGGER IF NOT EXISTS messages_au AFTER UPDATE ON messages BEGIN | |
| INSERT INTO messages_fts(messages_fts, rowid, text_plain, from_name) | |
| VALUES ('delete', old.id, old.text_plain, old.from_name); | |
| INSERT INTO messages_fts(rowid, text_plain, from_name) | |
| VALUES (new.id, new.text_plain, new.from_name); | |
| END; | |
| -- ============================================ | |
| -- OPTIMIZED INDEXES | |
| -- ============================================ | |
| -- Composite indexes for common query patterns | |
| CREATE INDEX IF NOT EXISTS idx_messages_date ON messages(date_unixtime); | |
| CREATE INDEX IF NOT EXISTS idx_messages_from ON messages(from_id); | |
| CREATE INDEX IF NOT EXISTS idx_messages_from_date ON messages(from_id, date_unixtime); | |
| CREATE INDEX IF NOT EXISTS idx_messages_reply ON messages(reply_to_message_id) WHERE reply_to_message_id IS NOT NULL; | |
| CREATE INDEX IF NOT EXISTS idx_messages_forwarded ON messages(forwarded_from_id) WHERE forwarded_from_id IS NOT NULL; | |
| CREATE INDEX IF NOT EXISTS idx_messages_has_links ON messages(has_links) WHERE has_links = 1; | |
| CREATE INDEX IF NOT EXISTS idx_messages_has_media ON messages(has_media) WHERE has_media = 1; | |
| -- Entity indexes | |
| CREATE INDEX IF NOT EXISTS idx_entities_message ON entities(message_id); | |
| CREATE INDEX IF NOT EXISTS idx_entities_type_value ON entities(type, value); | |
| CREATE INDEX IF NOT EXISTS idx_entities_value ON entities(value); | |
| -- Graph indexes | |
| CREATE INDEX IF NOT EXISTS idx_reply_graph_child ON reply_graph(child_id); | |
| CREATE INDEX IF NOT EXISTS idx_message_threads_thread ON message_threads(thread_id); | |
| -- Trigram index | |
| CREATE INDEX IF NOT EXISTS idx_trigrams_trigram ON trigrams(trigram); | |
| -- ============================================ | |
| -- PARTICIPANTS TABLE (from Telethon API) | |
| -- ============================================ | |
| CREATE TABLE IF NOT EXISTS participants ( | |
| user_id TEXT PRIMARY KEY, | |
| first_name TEXT, | |
| last_name TEXT, | |
| username TEXT, | |
| phone TEXT, | |
| is_bot INTEGER DEFAULT 0, | |
| is_admin INTEGER DEFAULT 0, | |
| is_creator INTEGER DEFAULT 0, | |
| is_premium INTEGER DEFAULT 0, | |
| join_date INTEGER, | |
| last_status TEXT DEFAULT 'unknown', | |
| last_online INTEGER, | |
| about TEXT, | |
| updated_at INTEGER | |
| ); | |
| -- ============================================ | |
| -- STATISTICS TABLE FOR FAST AGGREGATIONS | |
| -- ============================================ | |
| CREATE TABLE IF NOT EXISTS stats_cache ( | |
| key TEXT PRIMARY KEY, | |
| value TEXT, | |
| updated_at INTEGER | |
| ); | |
| -- ============================================ | |
| -- VECTOR EMBEDDINGS TABLE (OPTIONAL) | |
| -- ============================================ | |
| -- For semantic search with FAISS | |
| CREATE TABLE IF NOT EXISTS embeddings ( | |
| message_id INTEGER PRIMARY KEY, | |
| embedding BLOB, -- Serialized numpy array | |
| model_name TEXT DEFAULT 'default', | |
| FOREIGN KEY (message_id) REFERENCES messages(id) ON DELETE CASCADE | |
| ); | |