DomLoyer commited on
Commit
ca696b3
·
1 Parent(s): 1a81e0d

add: db_store.py

Browse files
Files changed (1) hide show
  1. syscred/db_store.py +354 -0
syscred/db_store.py ADDED
@@ -0,0 +1,354 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ SysCRED Storage Module - SQLite + Supabase
3
+ ==========================================
4
+ Stocke les triplets RDF et résultats d'analyse.
5
+ Utilise SQLite localement, avec option de sync vers Supabase.
6
+ """
7
+
8
+ import os
9
+ import sqlite3
10
+ import hashlib
11
+ import json
12
+ from datetime import datetime
13
+ from typing import Optional, Dict, Any, List, Tuple
14
+ from urllib.parse import urlparse
15
+ from pathlib import Path
16
+
17
+ # Chemins
18
+ BASE_DIR = Path(__file__).parent
19
+ DB_PATH = BASE_DIR / "syscred_local.db"
20
+
21
+ class SysCREDStore:
22
+ """
23
+ Gestionnaire de stockage pour SysCRED.
24
+ SQLite local avec option Supabase.
25
+ """
26
+
27
+ def __init__(self, db_path: str = None, supabase_url: str = None):
28
+ self.db_path = db_path or str(DB_PATH)
29
+ self.supabase_url = supabase_url or os.getenv("DATABASE_URL")
30
+ self.conn = None
31
+ self._init_local_db()
32
+
33
+ def _init_local_db(self):
34
+ """Initialise la base SQLite locale."""
35
+ self.conn = sqlite3.connect(self.db_path, check_same_thread=False)
36
+ self.conn.row_factory = sqlite3.Row
37
+
38
+ # Créer les tables
39
+ self.conn.executescript("""
40
+ -- Résultats d'analyse
41
+ CREATE TABLE IF NOT EXISTS analysis_results (
42
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
43
+ url TEXT NOT NULL,
44
+ credibility_score REAL NOT NULL,
45
+ summary TEXT,
46
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
47
+ source_reputation TEXT,
48
+ fact_check_count INTEGER DEFAULT 0,
49
+ score_details TEXT,
50
+ domain TEXT
51
+ );
52
+
53
+ -- Triplets RDF
54
+ CREATE TABLE IF NOT EXISTS rdf_triples (
55
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
56
+ subject TEXT NOT NULL,
57
+ predicate TEXT NOT NULL,
58
+ object TEXT NOT NULL,
59
+ object_type TEXT DEFAULT 'uri',
60
+ graph_name TEXT DEFAULT 'data',
61
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
62
+ UNIQUE(subject, predicate, object, graph_name)
63
+ );
64
+
65
+ -- Sources
66
+ CREATE TABLE IF NOT EXISTS sources (
67
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
68
+ domain TEXT UNIQUE NOT NULL,
69
+ reputation_score REAL,
70
+ domain_age_years REAL,
71
+ is_fact_checker INTEGER DEFAULT 0,
72
+ analysis_count INTEGER DEFAULT 0,
73
+ last_analyzed TIMESTAMP,
74
+ metadata TEXT,
75
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
76
+ );
77
+
78
+ -- Claims
79
+ CREATE TABLE IF NOT EXISTS claims (
80
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
81
+ claim_text TEXT NOT NULL,
82
+ claim_hash TEXT UNIQUE,
83
+ source_url TEXT,
84
+ extracted_entities TEXT,
85
+ credibility_score REAL,
86
+ verification_status TEXT DEFAULT 'unverified',
87
+ evidence_count INTEGER DEFAULT 0,
88
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
89
+ );
90
+
91
+ -- Evidence
92
+ CREATE TABLE IF NOT EXISTS evidence (
93
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
94
+ claim_id INTEGER,
95
+ doc_id TEXT,
96
+ doc_text TEXT,
97
+ relevance_score REAL,
98
+ retrieval_method TEXT DEFAULT 'bm25',
99
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
100
+ FOREIGN KEY (claim_id) REFERENCES claims(id)
101
+ );
102
+
103
+ -- Index
104
+ CREATE INDEX IF NOT EXISTS idx_analysis_url ON analysis_results(url);
105
+ CREATE INDEX IF NOT EXISTS idx_triple_subject ON rdf_triples(subject);
106
+ CREATE INDEX IF NOT EXISTS idx_triple_graph ON rdf_triples(graph_name);
107
+ CREATE INDEX IF NOT EXISTS idx_sources_domain ON sources(domain);
108
+ """)
109
+ self.conn.commit()
110
+ print(f"[SysCREDStore] SQLite initialisé: {self.db_path}")
111
+
112
+ # =========================================================================
113
+ # ONTOLOGY / RDF TRIPLES
114
+ # =========================================================================
115
+
116
+ def sync_ontology(self, ontology_manager) -> Dict[str, int]:
117
+ """
118
+ Synchronise les graphes RDFLib vers SQLite.
119
+
120
+ Args:
121
+ ontology_manager: Instance avec base_graph et data_graph
122
+ """
123
+ result = {'base_synced': 0, 'data_synced': 0}
124
+
125
+ try:
126
+ # Sync base ontology
127
+ if hasattr(ontology_manager, 'base_graph') and ontology_manager.base_graph:
128
+ result['base_synced'] = self._sync_graph(
129
+ ontology_manager.base_graph,
130
+ graph_name='base'
131
+ )
132
+
133
+ # Sync data graph
134
+ if hasattr(ontology_manager, 'data_graph') and ontology_manager.data_graph:
135
+ result['data_synced'] = self._sync_graph(
136
+ ontology_manager.data_graph,
137
+ graph_name='data'
138
+ )
139
+
140
+ self.conn.commit()
141
+ print(f"[SysCREDStore] Synced {result['base_synced']} base + {result['data_synced']} data triples")
142
+
143
+ except Exception as e:
144
+ result['error'] = str(e)
145
+ print(f"[SysCREDStore] Sync error: {e}")
146
+
147
+ return result
148
+
149
+ def _sync_graph(self, graph, graph_name: str) -> int:
150
+ """Sync un graphe RDFLib vers SQLite."""
151
+ from rdflib import Literal
152
+
153
+ count = 0
154
+ cursor = self.conn.cursor()
155
+
156
+ for s, p, o in graph:
157
+ subject = str(s)
158
+ predicate = str(p)
159
+ obj_value = str(o)
160
+ obj_type = 'literal' if isinstance(o, Literal) else 'uri'
161
+
162
+ try:
163
+ cursor.execute("""
164
+ INSERT OR IGNORE INTO rdf_triples
165
+ (subject, predicate, object, object_type, graph_name)
166
+ VALUES (?, ?, ?, ?, ?)
167
+ """, (subject, predicate, obj_value, obj_type, graph_name))
168
+ count += 1
169
+ except:
170
+ pass
171
+
172
+ return count
173
+
174
+ def get_triple_stats(self) -> Dict[str, int]:
175
+ """Statistiques des triplets."""
176
+ cursor = self.conn.cursor()
177
+
178
+ cursor.execute("SELECT COUNT(*) FROM rdf_triples WHERE graph_name = 'base'")
179
+ base = cursor.fetchone()[0]
180
+
181
+ cursor.execute("SELECT COUNT(*) FROM rdf_triples WHERE graph_name = 'data'")
182
+ data = cursor.fetchone()[0]
183
+
184
+ return {
185
+ 'base_triples': base,
186
+ 'data_triples': data,
187
+ 'total_triples': base + data
188
+ }
189
+
190
+ # =========================================================================
191
+ # ANALYSIS RESULTS
192
+ # =========================================================================
193
+
194
+ def save_analysis(self, url: str, credibility_score: float,
195
+ summary: str = None, score_details: Dict = None,
196
+ source_reputation: str = None, fact_check_count: int = 0) -> int:
197
+ """Sauvegarde un résultat d'analyse."""
198
+ domain = urlparse(url).netloc
199
+
200
+ cursor = self.conn.cursor()
201
+ cursor.execute("""
202
+ INSERT INTO analysis_results
203
+ (url, credibility_score, summary, score_details, source_reputation,
204
+ fact_check_count, domain)
205
+ VALUES (?, ?, ?, ?, ?, ?, ?)
206
+ """, (
207
+ url, credibility_score, summary,
208
+ json.dumps(score_details) if score_details else None,
209
+ source_reputation, fact_check_count, domain
210
+ ))
211
+ self.conn.commit()
212
+
213
+ result_id = cursor.lastrowid
214
+ print(f"[SysCREDStore] Saved analysis #{result_id} for {domain}")
215
+
216
+ # Update source stats
217
+ self._update_source(domain, credibility_score)
218
+
219
+ return result_id
220
+
221
+ def get_history(self, url: str = None, limit: int = 50) -> List[Dict]:
222
+ """Récupère l'historique des analyses."""
223
+ cursor = self.conn.cursor()
224
+
225
+ if url:
226
+ cursor.execute("""
227
+ SELECT * FROM analysis_results
228
+ WHERE url = ? ORDER BY created_at DESC LIMIT ?
229
+ """, (url, limit))
230
+ else:
231
+ cursor.execute("""
232
+ SELECT * FROM analysis_results
233
+ ORDER BY created_at DESC LIMIT ?
234
+ """, (limit,))
235
+
236
+ return [dict(row) for row in cursor.fetchall()]
237
+
238
+ # =========================================================================
239
+ # SOURCES
240
+ # =========================================================================
241
+
242
+ def _update_source(self, domain: str, score: float = None):
243
+ """Met à jour les stats d'une source."""
244
+ cursor = self.conn.cursor()
245
+
246
+ cursor.execute("SELECT id, analysis_count FROM sources WHERE domain = ?", (domain,))
247
+ row = cursor.fetchone()
248
+
249
+ if row:
250
+ cursor.execute("""
251
+ UPDATE sources SET
252
+ analysis_count = analysis_count + 1,
253
+ last_analyzed = CURRENT_TIMESTAMP,
254
+ reputation_score = COALESCE(?, reputation_score)
255
+ WHERE domain = ?
256
+ """, (score, domain))
257
+ else:
258
+ cursor.execute("""
259
+ INSERT INTO sources (domain, reputation_score, analysis_count, last_analyzed)
260
+ VALUES (?, ?, 1, CURRENT_TIMESTAMP)
261
+ """, (domain, score))
262
+
263
+ self.conn.commit()
264
+
265
+ def get_source(self, domain: str) -> Optional[Dict]:
266
+ """Récupère les infos d'une source."""
267
+ cursor = self.conn.cursor()
268
+ cursor.execute("SELECT * FROM sources WHERE domain = ?", (domain,))
269
+ row = cursor.fetchone()
270
+ return dict(row) if row else None
271
+
272
+ # =========================================================================
273
+ # GLOBAL STATS
274
+ # =========================================================================
275
+
276
+ def get_stats(self) -> Dict[str, Any]:
277
+ """Statistiques globales."""
278
+ cursor = self.conn.cursor()
279
+
280
+ cursor.execute("SELECT COUNT(*) FROM analysis_results")
281
+ total_analyses = cursor.fetchone()[0]
282
+
283
+ cursor.execute("SELECT COUNT(*) FROM sources")
284
+ unique_domains = cursor.fetchone()[0]
285
+
286
+ cursor.execute("SELECT AVG(credibility_score) FROM analysis_results")
287
+ avg_score = cursor.fetchone()[0]
288
+
289
+ triple_stats = self.get_triple_stats()
290
+
291
+ return {
292
+ 'total_analyses': total_analyses,
293
+ 'unique_domains': unique_domains,
294
+ 'avg_credibility': round(avg_score, 2) if avg_score else None,
295
+ **triple_stats
296
+ }
297
+
298
+ def close(self):
299
+ """Ferme la connexion."""
300
+ if self.conn:
301
+ self.conn.close()
302
+
303
+
304
+ # ============================================================================
305
+ # INTEGRATION
306
+ # ============================================================================
307
+
308
+ def sync_ontology_to_db():
309
+ """Synchronise l'ontologie vers la base de données."""
310
+ import sys
311
+ sys.path.insert(0, str(BASE_DIR))
312
+
313
+ try:
314
+ from ontology_manager import OntologyManager
315
+ from config import Config
316
+
317
+ # Init ontology
318
+ onto = OntologyManager(
319
+ base_ontology_path=str(Config.ONTOLOGY_BASE_PATH),
320
+ data_path=str(Config.ONTOLOGY_DATA_PATH)
321
+ )
322
+
323
+ # Init store
324
+ store = SysCREDStore()
325
+
326
+ # Sync
327
+ result = store.sync_ontology(onto)
328
+ print(f"\n✅ Sync complete: {result}")
329
+
330
+ # Stats
331
+ stats = store.get_stats()
332
+ print(f"📊 Stats: {stats}")
333
+
334
+ return store
335
+
336
+ except ImportError as e:
337
+ print(f"Import error: {e}")
338
+ return None
339
+
340
+
341
+ # ============================================================================
342
+ # CLI
343
+ # ============================================================================
344
+
345
+ if __name__ == "__main__":
346
+ print("=" * 60)
347
+ print("SysCRED Storage - Synchronisation des triplets")
348
+ print("=" * 60)
349
+
350
+ store = sync_ontology_to_db()
351
+
352
+ if store:
353
+ print("\n✅ Base de données prête!")
354
+ print(f" Fichier: {store.db_path}")