D Ф m i И i q ц e L Ф y e r commited on
Commit
8e97fc5
·
1 Parent(s): 6059862

Deploy SysCRED v2.3.1 - GraphRAG + LIAR benchmark + TREC integration

Browse files
syscred/.env.example ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SysCRED Environment Configuration
2
+ # ==================================
3
+ # Copy this file to .env and fill in your values
4
+
5
+ # Supabase Database (PostgreSQL)
6
+ # Get this from: Supabase Dashboard > Settings > Database > Connection string
7
+ DATABASE_URL=postgresql://postgres:YOUR_PASSWORD@db.YOUR_PROJECT.supabase.co:5432/postgres
8
+
9
+ # Google Fact Check API Key
10
+ # Get this from: https://console.cloud.google.com/apis/credentials
11
+ GOOGLE_FACT_CHECK_API_KEY=your_google_api_key_here
12
+
13
+ # HuggingFace Space URL (for remote processing)
14
+ HF_SPACE_URL=https://domloyer-syscred.hf.space
15
+
16
+ # Flask Configuration
17
+ FLASK_DEBUG=false
18
+ FLASK_HOST=0.0.0.0
19
+ FLASK_PORT=5001
20
+
21
+ # ML Model Loading (set to false for lightweight mode)
22
+ LOAD_ML_MODELS=false
syscred/__init__.py CHANGED
@@ -11,14 +11,15 @@ Modules:
11
  - ir_engine: BM25, QLD, TF-IDF, PRF (from TREC)
12
  - trec_retriever: Evidence retrieval for fact-checking (NEW v2.3)
13
  - trec_dataset: TREC AP88-90 data loader (NEW v2.3)
 
14
  - seo_analyzer: SEO analysis, PageRank estimation
15
  - eval_metrics: MAP, NDCG, P@K, Recall, MRR
16
  - ontology_manager: RDFLib integration
17
  - verification_system: Main credibility pipeline
18
- - graph_rag: GraphRAG for contextual memory
19
  """
20
 
21
- __version__ = "2.3.0"
22
  __author__ = "Dominique S. Loyer"
23
  __citation__ = "loyerModelingHybridSystem2025"
24
 
@@ -29,11 +30,15 @@ from syscred.ontology_manager import OntologyManager
29
  from syscred.seo_analyzer import SEOAnalyzer
30
  from syscred.ir_engine import IREngine
31
  from syscred.eval_metrics import EvaluationMetrics
 
32
 
33
  # TREC Integration (NEW - Feb 2026)
34
  from syscred.trec_retriever import TRECRetriever, Evidence, RetrievalResult
35
  from syscred.trec_dataset import TRECDataset, TRECTopic
36
 
 
 
 
37
  # Convenience alias
38
  SysCRED = CredibilityVerificationSystem
39
 
@@ -46,10 +51,15 @@ __all__ = [
46
  'SEOAnalyzer',
47
  'IREngine',
48
  'EvaluationMetrics',
 
49
  # TREC (NEW)
50
  'TRECRetriever',
51
  'TRECDataset',
52
  'TRECTopic',
53
  'Evidence',
54
  'RetrievalResult',
 
 
 
 
55
  ]
 
11
  - ir_engine: BM25, QLD, TF-IDF, PRF (from TREC)
12
  - trec_retriever: Evidence retrieval for fact-checking (NEW v2.3)
13
  - trec_dataset: TREC AP88-90 data loader (NEW v2.3)
14
+ - liar_dataset: LIAR benchmark dataset loader (NEW v2.3)
15
  - seo_analyzer: SEO analysis, PageRank estimation
16
  - eval_metrics: MAP, NDCG, P@K, Recall, MRR
17
  - ontology_manager: RDFLib integration
18
  - verification_system: Main credibility pipeline
19
+ - graph_rag: GraphRAG for contextual memory (enhanced v2.3)
20
  """
21
 
22
+ __version__ = "2.3.1"
23
  __author__ = "Dominique S. Loyer"
24
  __citation__ = "loyerModelingHybridSystem2025"
25
 
 
30
  from syscred.seo_analyzer import SEOAnalyzer
31
  from syscred.ir_engine import IREngine
32
  from syscred.eval_metrics import EvaluationMetrics
33
+ from syscred.graph_rag import GraphRAG
34
 
35
  # TREC Integration (NEW - Feb 2026)
36
  from syscred.trec_retriever import TRECRetriever, Evidence, RetrievalResult
37
  from syscred.trec_dataset import TRECDataset, TRECTopic
38
 
39
+ # LIAR Benchmark (NEW - Feb 2026)
40
+ from syscred.liar_dataset import LIARDataset, LiarStatement, LiarLabel
41
+
42
  # Convenience alias
43
  SysCRED = CredibilityVerificationSystem
44
 
 
51
  'SEOAnalyzer',
52
  'IREngine',
53
  'EvaluationMetrics',
54
+ 'GraphRAG',
55
  # TREC (NEW)
56
  'TRECRetriever',
57
  'TRECDataset',
58
  'TRECTopic',
59
  'Evidence',
60
  'RetrievalResult',
61
+ # LIAR Benchmark (NEW)
62
+ 'LIARDataset',
63
+ 'LiarStatement',
64
+ 'LiarLabel',
65
  ]
syscred/backend_app.py CHANGED
@@ -17,6 +17,20 @@ Endpoints:
17
  import sys
18
  import os
19
  import traceback
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  from flask import Flask, request, jsonify, send_from_directory
21
  from flask_cors import CORS
22
 
@@ -31,10 +45,15 @@ try:
31
  from syscred.ontology_manager import OntologyManager
32
  from syscred.config import config, Config
33
  from syscred.database import init_db, db, AnalysisResult
 
 
 
34
  SYSCRED_AVAILABLE = True
35
- print("[SysCRED Backend] Modules imported successfully")
 
36
  except ImportError as e:
37
  SYSCRED_AVAILABLE = False
 
38
  print(f"[SysCRED Backend] Warning: Could not import modules: {e}")
39
  # Define dummy init_db to prevent crash
40
  def init_db(app): pass
@@ -63,6 +82,40 @@ except Exception as e:
63
  # --- Initialize SysCRED System ---
64
  credibility_system = None
65
  seo_analyzer = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
  def initialize_system():
68
  """Initialize the credibility system (lazy loading)."""
@@ -339,6 +392,201 @@ def ontology_stats():
339
  }), 200
340
 
341
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342
  # --- Main ---
343
  if __name__ == '__main__':
344
  print("=" * 60)
@@ -354,10 +602,15 @@ if __name__ == '__main__':
354
  print()
355
  print("[SysCRED Backend] Starting Flask server...")
356
  print("[SysCRED Backend] Endpoints:")
357
- print(" - POST /api/verify - Full credibility verification")
358
- print(" - POST /api/seo - SEO analysis only (faster)")
359
- print(" - GET /api/ontology/stats - Ontology statistics")
360
- print(" - GET /api/health - Health check")
 
 
 
 
 
361
  print()
362
 
363
  app.run(host='0.0.0.0', port=5001, debug=True)
 
17
  import sys
18
  import os
19
  import traceback
20
+
21
+ # Load environment variables from .env file
22
+ from pathlib import Path
23
+ try:
24
+ from dotenv import load_dotenv
25
+ env_path = Path(__file__).parent / '.env'
26
+ if env_path.exists():
27
+ load_dotenv(env_path)
28
+ print(f"[SysCRED Backend] Loaded .env from {env_path}")
29
+ else:
30
+ print(f"[SysCRED Backend] No .env file found at {env_path}")
31
+ except ImportError:
32
+ print("[SysCRED Backend] python-dotenv not installed, using system env vars")
33
+
34
  from flask import Flask, request, jsonify, send_from_directory
35
  from flask_cors import CORS
36
 
 
45
  from syscred.ontology_manager import OntologyManager
46
  from syscred.config import config, Config
47
  from syscred.database import init_db, db, AnalysisResult
48
+ # TREC modules
49
+ from syscred.trec_retriever import TRECRetriever, Evidence, RetrievalResult
50
+ from syscred.eval_metrics import EvaluationMetrics
51
  SYSCRED_AVAILABLE = True
52
+ TREC_AVAILABLE = True
53
+ print("[SysCRED Backend] Modules imported successfully (including TREC)")
54
  except ImportError as e:
55
  SYSCRED_AVAILABLE = False
56
+ TREC_AVAILABLE = False
57
  print(f"[SysCRED Backend] Warning: Could not import modules: {e}")
58
  # Define dummy init_db to prevent crash
59
  def init_db(app): pass
 
82
  # --- Initialize SysCRED System ---
83
  credibility_system = None
84
  seo_analyzer = None
85
+ trec_retriever = None
86
+ eval_metrics = None
87
+
88
+ # Demo corpus for TREC (AP88-90 style documents)
89
+ TREC_DEMO_CORPUS = {
90
+ "AP880101-0001": {
91
+ "text": "Climate change is primarily caused by human activities, particularly the burning of fossil fuels which release greenhouse gases into the atmosphere.",
92
+ "title": "Climate Science Report"
93
+ },
94
+ "AP880101-0002": {
95
+ "text": "The Earth's temperature has risen significantly over the past century due to greenhouse gas emissions from industrial activities and deforestation.",
96
+ "title": "Global Warming Study"
97
+ },
98
+ "AP880102-0001": {
99
+ "text": "Scientists warn that sea levels could rise dramatically if current warming trends continue, threatening coastal cities worldwide.",
100
+ "title": "Sea Level Warning"
101
+ },
102
+ "AP890215-0001": {
103
+ "text": "The presidential election campaign focused on economic policies, healthcare reform, and national security issues.",
104
+ "title": "Election Coverage"
105
+ },
106
+ "AP890216-0001": {
107
+ "text": "Stock markets rose sharply after positive economic indicators were released by the Federal Reserve, signaling economic recovery.",
108
+ "title": "Financial News"
109
+ },
110
+ "AP880201-0001": {
111
+ "text": "Renewable energy sources like solar and wind power are becoming more cost-effective alternatives to fossil fuels.",
112
+ "title": "Green Energy Report"
113
+ },
114
+ "AP890301-0001": {
115
+ "text": "The technology industry continues to grow rapidly, with artificial intelligence and machine learning driving innovation.",
116
+ "title": "Tech Industry Update"
117
+ },
118
+ }
119
 
120
  def initialize_system():
121
  """Initialize the credibility system (lazy loading)."""
 
392
  }), 200
393
 
394
 
395
+ # --- TREC Endpoints ---
396
+
397
+ @app.route('/api/trec/search', methods=['POST'])
398
+ def trec_search():
399
+ """
400
+ Search for evidence using TREC retrieval methods.
401
+
402
+ Request JSON:
403
+ {
404
+ "query": "Claim or query to search for",
405
+ "k": 10, # Number of results (optional, default 10)
406
+ "model": "bm25" # Retrieval model: bm25, tfidf, qld (optional)
407
+ }
408
+
409
+ Response:
410
+ {
411
+ "query": "original query",
412
+ "results": [
413
+ {"doc_id": "AP880101-0001", "score": 6.27, "rank": 1, "text": "...", "title": "..."},
414
+ ...
415
+ ],
416
+ "total": 3,
417
+ "model": "bm25",
418
+ "search_time_ms": 12.5
419
+ }
420
+ """
421
+ global trec_retriever, eval_metrics
422
+
423
+ # Initialize TREC components if needed
424
+ if trec_retriever is None:
425
+ try:
426
+ trec_retriever = TRECRetriever(use_stemming=True, enable_prf=False)
427
+ trec_retriever.corpus = TREC_DEMO_CORPUS
428
+ eval_metrics = EvaluationMetrics()
429
+ print("[SysCRED Backend] TREC Retriever initialized with demo corpus")
430
+ except Exception as e:
431
+ return jsonify({'error': f'TREC initialization failed: {str(e)}'}), 503
432
+
433
+ if not request.is_json:
434
+ return jsonify({'error': 'Request must be JSON'}), 400
435
+
436
+ data = request.get_json()
437
+ query = data.get('query', '').strip()
438
+
439
+ if not query:
440
+ return jsonify({'error': "'query' is required"}), 400
441
+
442
+ k = data.get('k', 10)
443
+ model = data.get('model', 'bm25')
444
+
445
+ try:
446
+ import time
447
+ start_time = time.time()
448
+
449
+ # Retrieve evidence
450
+ result = trec_retriever.retrieve_evidence(query, k=k, model=model)
451
+ search_time_ms = (time.time() - start_time) * 1000
452
+
453
+ # Format results
454
+ results = []
455
+ for ev in result.evidences:
456
+ doc_info = trec_retriever.corpus.get(ev.doc_id, {})
457
+ results.append({
458
+ 'doc_id': ev.doc_id,
459
+ 'score': round(ev.score, 4),
460
+ 'rank': ev.rank,
461
+ 'text': ev.text,
462
+ 'title': doc_info.get('title', ''),
463
+ 'model': ev.retrieval_model
464
+ })
465
+
466
+ return jsonify({
467
+ 'query': query,
468
+ 'results': results,
469
+ 'total': len(results),
470
+ 'model': model,
471
+ 'search_time_ms': round(search_time_ms, 2)
472
+ }), 200
473
+
474
+ except Exception as e:
475
+ print(f"[SysCRED Backend] TREC search error: {e}")
476
+ traceback.print_exc()
477
+ return jsonify({'error': str(e)}), 500
478
+
479
+
480
+ @app.route('/api/trec/corpus', methods=['GET'])
481
+ def trec_corpus():
482
+ """
483
+ Get the TREC demo corpus information.
484
+
485
+ Response:
486
+ {
487
+ "corpus_size": 7,
488
+ "corpus_type": "AP88-90 Demo",
489
+ "documents": [
490
+ {"doc_id": "AP880101-0001", "title": "...", "text_preview": "..."},
491
+ ...
492
+ ]
493
+ }
494
+ """
495
+ docs = []
496
+ for doc_id, doc in TREC_DEMO_CORPUS.items():
497
+ docs.append({
498
+ 'doc_id': doc_id,
499
+ 'title': doc.get('title', ''),
500
+ 'text_preview': doc['text'][:150] + '...' if len(doc['text']) > 150 else doc['text']
501
+ })
502
+
503
+ return jsonify({
504
+ 'corpus_size': len(TREC_DEMO_CORPUS),
505
+ 'corpus_type': 'AP88-90 Demo',
506
+ 'documents': docs
507
+ }), 200
508
+
509
+
510
+ @app.route('/api/trec/metrics', methods=['POST'])
511
+ def trec_metrics():
512
+ """
513
+ Calculate IR evaluation metrics for a retrieval result.
514
+
515
+ Request JSON:
516
+ {
517
+ "retrieved": ["AP880101-0001", "AP890215-0001", "AP880101-0002"],
518
+ "relevant": ["AP880101-0001", "AP880101-0002", "AP880102-0001"]
519
+ }
520
+
521
+ Response:
522
+ {
523
+ "precision_at_3": 0.67,
524
+ "recall_at_3": 0.67,
525
+ "average_precision": 0.81,
526
+ "mrr": 1.0,
527
+ "ndcg_at_3": 0.88
528
+ }
529
+ """
530
+ global eval_metrics
531
+
532
+ if eval_metrics is None:
533
+ eval_metrics = EvaluationMetrics()
534
+
535
+ if not request.is_json:
536
+ return jsonify({'error': 'Request must be JSON'}), 400
537
+
538
+ data = request.get_json()
539
+ retrieved = data.get('retrieved', [])
540
+ relevant = set(data.get('relevant', []))
541
+
542
+ if not retrieved:
543
+ return jsonify({'error': "'retrieved' list is required"}), 400
544
+
545
+ k = len(retrieved)
546
+
547
+ try:
548
+ # Calculate metrics
549
+ p_at_k = eval_metrics.precision_at_k(retrieved, relevant, k)
550
+ r_at_k = eval_metrics.recall_at_k(retrieved, relevant, k)
551
+ ap = eval_metrics.average_precision(retrieved, relevant)
552
+ mrr = eval_metrics.mrr(retrieved, relevant)
553
+
554
+ # For NDCG, create relevance dict (binary: 1 if relevant, 0 otherwise)
555
+ relevance_dict = {doc: 1 for doc in relevant}
556
+ ndcg = eval_metrics.ndcg_at_k(retrieved, relevance_dict, k)
557
+
558
+ return jsonify({
559
+ f'precision_at_{k}': round(p_at_k, 4),
560
+ f'recall_at_{k}': round(r_at_k, 4),
561
+ 'average_precision': round(ap, 4),
562
+ 'mrr': round(mrr, 4),
563
+ f'ndcg_at_{k}': round(ndcg, 4),
564
+ 'metrics_explanation': {
565
+ 'P@K': 'Proportion de documents pertinents parmi les K premiers récupérés',
566
+ 'R@K': 'Proportion de documents pertinents récupérés parmi tous les pertinents',
567
+ 'AP': 'Moyenne des précisions à chaque document pertinent trouvé',
568
+ 'MRR': 'Rang réciproque du premier document pertinent',
569
+ 'NDCG': 'Gain cumulatif normalisé avec décroissance logarithmique'
570
+ }
571
+ }), 200
572
+
573
+ except Exception as e:
574
+ print(f"[SysCRED Backend] TREC metrics error: {e}")
575
+ return jsonify({'error': str(e)}), 500
576
+
577
+
578
+ @app.route('/api/trec/health', methods=['GET'])
579
+ def trec_health():
580
+ """Health check for TREC module."""
581
+ return jsonify({
582
+ 'status': 'healthy',
583
+ 'trec_available': TREC_AVAILABLE if 'TREC_AVAILABLE' in dir() else True,
584
+ 'retriever_initialized': trec_retriever is not None,
585
+ 'corpus_size': len(TREC_DEMO_CORPUS),
586
+ 'models_available': ['bm25', 'tfidf', 'qld']
587
+ }), 200
588
+
589
+
590
  # --- Main ---
591
  if __name__ == '__main__':
592
  print("=" * 60)
 
602
  print()
603
  print("[SysCRED Backend] Starting Flask server...")
604
  print("[SysCRED Backend] Endpoints:")
605
+ print(" - POST /api/verify - Full credibility verification")
606
+ print(" - POST /api/seo - SEO analysis only (faster)")
607
+ print(" - GET /api/ontology/stats - Ontology statistics")
608
+ print(" - GET /api/health - Health check")
609
+ print(" --- TREC Endpoints ---")
610
+ print(" - POST /api/trec/search - Evidence retrieval (BM25/TF-IDF/QLD)")
611
+ print(" - POST /api/trec/metrics - Calculate IR metrics (MAP, P@K, NDCG)")
612
+ print(" - GET /api/trec/corpus - Demo corpus info")
613
+ print(" - GET /api/trec/health - TREC module health")
614
  print()
615
 
616
  app.run(host='0.0.0.0', port=5001, debug=True)
syscred/config.py CHANGED
@@ -88,13 +88,15 @@ class Config:
88
  PRF_EXPANSION_TERMS = int(os.getenv("SYSCRED_PRF_TERMS", "10"))
89
 
90
  # === Pondération des scores ===
 
91
  SCORE_WEIGHTS = {
92
- 'source_reputation': 0.25,
93
- 'domain_age': 0.10,
94
- 'sentiment_neutrality': 0.15,
95
- 'entity_presence': 0.15,
96
- 'coherence': 0.15,
97
- 'fact_check': 0.20
 
98
  }
99
 
100
  # === Seuils de crédibilité ===
 
88
  PRF_EXPANSION_TERMS = int(os.getenv("SYSCRED_PRF_TERMS", "10"))
89
 
90
  # === Pondération des scores ===
91
+ # Note: Weights should sum to 1.0 for proper normalization
92
  SCORE_WEIGHTS = {
93
+ 'source_reputation': 0.22, # Was 0.25, reduced for graph_context
94
+ 'domain_age': 0.08, # Was 0.10
95
+ 'sentiment_neutrality': 0.13, # Was 0.15
96
+ 'entity_presence': 0.13, # Was 0.15
97
+ 'coherence': 0.12, # Was 0.15
98
+ 'fact_check': 0.17, # Was 0.20
99
+ 'graph_context': 0.15 # NEW - Historical knowledge from GraphRAG
100
  }
101
 
102
  # === Seuils de crédibilité ===
syscred/datasets/liar/README ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ LIAR: A BENCHMARK DATASET FOR FAKE NEWS DETECTION
2
+
3
+ William Yang Wang, "Liar, Liar Pants on Fire": A New Benchmark Dataset for Fake News Detection, to appear in Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (ACL 2017), short paper, Vancouver, BC, Canada, July 30-August 4, ACL.
4
+ =====================================================================
5
+ Description of the TSV format:
6
+
7
+ Column 1: the ID of the statement ([ID].json).
8
+ Column 2: the label.
9
+ Column 3: the statement.
10
+ Column 4: the subject(s).
11
+ Column 5: the speaker.
12
+ Column 6: the speaker's job title.
13
+ Column 7: the state info.
14
+ Column 8: the party affiliation.
15
+ Column 9-13: the total credit history count, including the current statement.
16
+ 9: barely true counts.
17
+ 10: false counts.
18
+ 11: half true counts.
19
+ 12: mostly true counts.
20
+ 13: pants on fire counts.
21
+ Column 14: the context (venue / location of the speech or statement).
22
+
23
+ Note that we do not provide the full-text verdict report in this current version of the dataset,
24
+ but you can use the following command to access the full verdict report and links to the source documents:
25
+ wget http://www.politifact.com//api/v/2/statement/[ID]/?format=json
26
+
27
+ ======================================================================
28
+ The original sources retain the copyright of the data.
29
+
30
+ Note that there are absolutely no guarantees with this data,
31
+ and we provide this dataset "as is",
32
+ but you are welcome to report the issues of the preliminary version
33
+ of this data.
34
+
35
+ You are allowed to use this dataset for research purposes only.
36
+
37
+ For more question about the dataset, please contact:
38
+ William Wang, william@cs.ucsb.edu
39
+
40
+ v1.0 04/23/2017
41
+
syscred/datasets/liar/test.tsv ADDED
The diff for this file is too large to render. See raw diff
 
syscred/datasets/liar/train.tsv ADDED
The diff for this file is too large to render. See raw diff
 
syscred/datasets/liar/valid.tsv ADDED
The diff for this file is too large to render. See raw diff
 
syscred/eval_metrics.py CHANGED
@@ -133,6 +133,15 @@ class EvaluationMetrics:
133
  return 1.0 / (i + 1)
134
  return 0.0
135
 
 
 
 
 
 
 
 
 
 
136
  # --- TREC-Style Evaluation ---
137
 
138
  def evaluate_run(
 
133
  return 1.0 / (i + 1)
134
  return 0.0
135
 
136
+ @staticmethod
137
+ def mrr(retrieved: List[str], relevant: set) -> float:
138
+ """
139
+ Alias for reciprocal_rank (Mean Reciprocal Rank for single query).
140
+
141
+ MRR = 1 / rank of first relevant document
142
+ """
143
+ return EvaluationMetrics.reciprocal_rank(retrieved, relevant)
144
+
145
  # --- TREC-Style Evaluation ---
146
 
147
  def evaluate_run(
syscred/graph_rag.py CHANGED
@@ -117,13 +117,13 @@ class GraphRAG:
117
  Returns dict with 'text' (for LLM) and 'uris' (for Graph linking).
118
  """
119
  if not keywords:
120
- return {"text": "", "uris": []}
121
 
122
  # Build REGEX filter for keywords (OR logic)
123
  # e.g., (fake|hoax|conspiracy)
124
  clean_kws = [k for k in keywords if len(k) > 3] # Skip short words
125
  if not clean_kws:
126
- return {"text": "", "uris": []}
127
 
128
  regex_pattern = "|".join(clean_kws)
129
 
@@ -156,10 +156,10 @@ class GraphRAG:
156
  })
157
  except Exception as e:
158
  print(f"[GraphRAG] Similar claims error: {e}")
159
- return {"text": "", "uris": []}
160
 
161
  if not results:
162
- return {"text": "", "uris": []}
163
 
164
  lines = [f"Found {len(results)} similar claims in history:"]
165
  for r in results:
@@ -167,5 +167,130 @@ class GraphRAG:
167
 
168
  return {
169
  "text": "\n".join(lines),
170
- "uris": [r['uri'] for r in results]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  }
 
117
  Returns dict with 'text' (for LLM) and 'uris' (for Graph linking).
118
  """
119
  if not keywords:
120
+ return {"text": "", "uris": [], "scores": []}
121
 
122
  # Build REGEX filter for keywords (OR logic)
123
  # e.g., (fake|hoax|conspiracy)
124
  clean_kws = [k for k in keywords if len(k) > 3] # Skip short words
125
  if not clean_kws:
126
+ return {"text": "", "uris": [], "scores": []}
127
 
128
  regex_pattern = "|".join(clean_kws)
129
 
 
156
  })
157
  except Exception as e:
158
  print(f"[GraphRAG] Similar claims error: {e}")
159
+ return {"text": "", "uris": [], "scores": []}
160
 
161
  if not results:
162
+ return {"text": "", "uris": [], "scores": []}
163
 
164
  lines = [f"Found {len(results)} similar claims in history:"]
165
  for r in results:
 
167
 
168
  return {
169
  "text": "\n".join(lines),
170
+ "uris": [r['uri'] for r in results],
171
+ "scores": [r['score'] for r in results]
172
+ }
173
+
174
+ def compute_context_score(self, domain: str, keywords: List[str] = []) -> Dict[str, float]:
175
+ """
176
+ Compute numerical context scores for integration into credibility scoring.
177
+
178
+ This transforms the GraphRAG context into actionable numerical scores
179
+ that can be directly used in the calculate_overall_score() function.
180
+
181
+ Args:
182
+ domain: The domain being analyzed (e.g., 'lemonde.fr')
183
+ keywords: List of keywords from the claim
184
+
185
+ Returns:
186
+ Dictionary with:
187
+ - 'history_score': 0.0-1.0 based on past evaluations of this domain
188
+ - 'pattern_score': 0.0-1.0 based on similar claims in the graph
189
+ - 'combined_score': Weighted average (0.7 * history + 0.3 * pattern)
190
+ - 'confidence': How confident we are (based on amount of data)
191
+ - 'has_history': Boolean if domain has prior evaluations
192
+ """
193
+ result = {
194
+ 'history_score': 0.5, # Neutral default
195
+ 'pattern_score': 0.5,
196
+ 'combined_score': 0.5,
197
+ 'confidence': 0.0,
198
+ 'has_history': False,
199
+ 'history_count': 0,
200
+ 'similar_count': 0
201
+ }
202
+
203
+ if not self.om:
204
+ return result
205
+
206
+ # 1. Get source history score
207
+ history_data = self._get_source_history_data(domain)
208
+ if history_data['count'] > 0:
209
+ result['history_score'] = history_data['avg_score']
210
+ result['has_history'] = True
211
+ result['history_count'] = history_data['count']
212
+ # Confidence increases with more data points (max at 5)
213
+ history_confidence = min(1.0, history_data['count'] / 5)
214
+ else:
215
+ history_confidence = 0.0
216
+
217
+ # 2. Get pattern score from similar claims
218
+ if keywords:
219
+ similar_result = self._find_similar_claims(keywords)
220
+ scores = similar_result.get('scores', [])
221
+ if scores:
222
+ result['pattern_score'] = sum(scores) / len(scores)
223
+ result['similar_count'] = len(scores)
224
+ pattern_confidence = min(1.0, len(scores) / 3)
225
+ else:
226
+ pattern_confidence = 0.0
227
+ else:
228
+ pattern_confidence = 0.0
229
+
230
+ # 3. Calculate combined score
231
+ # Weight history more heavily than pattern matching
232
+ if result['has_history'] and result['similar_count'] > 0:
233
+ result['combined_score'] = 0.7 * result['history_score'] + 0.3 * result['pattern_score']
234
+ result['confidence'] = 0.6 * history_confidence + 0.4 * pattern_confidence
235
+ elif result['has_history']:
236
+ result['combined_score'] = result['history_score']
237
+ result['confidence'] = history_confidence * 0.8 # Reduce confidence without pattern
238
+ elif result['similar_count'] > 0:
239
+ result['combined_score'] = result['pattern_score']
240
+ result['confidence'] = pattern_confidence * 0.5 # Lower confidence with only patterns
241
+ else:
242
+ # No data available - return neutral
243
+ result['combined_score'] = 0.5
244
+ result['confidence'] = 0.0
245
+
246
+ return result
247
+
248
+ def _get_source_history_data(self, domain: str) -> Dict[str, Any]:
249
+ """
250
+ Query the graph for evaluation statistics of this domain.
251
+
252
+ Returns:
253
+ Dictionary with 'count', 'avg_score', 'last_verdict', 'scores'
254
+ """
255
+ if not domain:
256
+ return {'count': 0, 'avg_score': 0.5, 'scores': []}
257
+
258
+ query = """
259
+ PREFIX cred: <https://github.com/DominiqueLoyer/systemFactChecking#>
260
+
261
+ SELECT ?score ?level ?timestamp
262
+ WHERE {
263
+ ?info cred:informationURL ?url .
264
+ ?request cred:concernsInformation ?info .
265
+ ?report cred:isReportOf ?request .
266
+ ?report cred:credibilityScoreValue ?score .
267
+ ?report cred:assignsCredibilityLevel ?level .
268
+ ?report cred:completionTimestamp ?timestamp .
269
+ FILTER(CONTAINS(STR(?url), "%s"))
270
+ }
271
+ ORDER BY DESC(?timestamp)
272
+ LIMIT 10
273
+ """ % domain
274
+
275
+ scores = []
276
+ last_verdict = None
277
+
278
+ try:
279
+ combined = self.om.base_graph + self.om.data_graph
280
+ for i, row in enumerate(combined.query(query)):
281
+ scores.append(float(row.score))
282
+ if i == 0:
283
+ last_verdict = str(row.level).split('#')[-1]
284
+ except Exception as e:
285
+ print(f"[GraphRAG] History data query error: {e}")
286
+ return {'count': 0, 'avg_score': 0.5, 'scores': []}
287
+
288
+ if not scores:
289
+ return {'count': 0, 'avg_score': 0.5, 'scores': []}
290
+
291
+ return {
292
+ 'count': len(scores),
293
+ 'avg_score': sum(scores) / len(scores),
294
+ 'last_verdict': last_verdict,
295
+ 'scores': scores
296
  }
syscred/liar_benchmark_test.csv ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ id,statement,ground_truth_6way,ground_truth_binary,ground_truth_ternary,speaker,party,syscred_score,predicted_binary,predicted_ternary,binary_correct,ternary_correct,processing_time,error,sentiment,bias
2
+ 4134.json,"Your tax dollars are not being used to sue you, the people.",FALSE,Fake,False,alan-hays,republican,0.52,Real,Mixed,False,False,3.3536229133605957,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
3
+ 5710.json,General Motors is the largest corporation in the world again.,FALSE,Fake,False,joe-biden,democrat,0.52,Real,Mixed,False,False,0.7072207927703857,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
4
+ 7356.json,Three in four low-income workers dont have any paid sick days available.,TRUE,Real,True,sherrod-brown,democrat,0.52,Real,Mixed,True,False,0.7895858287811279,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
5
+ 11200.json,"Says 57 percent of federal spending goes to the military and just 1 percent goes to food and agriculture, including food stamps.",FALSE,Fake,False,facebook-posts,none,0.35,Fake,Mixed,True,False,0.8112809658050537,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
6
+ 447.json,I have never said that I don't wear flag pins or refuse to wear flag pins.,FALSE,Fake,False,barack-obama,democrat,0.52,Real,Mixed,False,False,0.7085680961608887,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
7
+ 2852.json,More than 10 years of free trade has brought a $2 billion per day trade deficit.,MOSTLY_TRUE,Real,True,sherrod-brown,democrat,0.52,Real,Mixed,True,False,1.0006098747253418,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
8
+ 13487.json,"Says that when San Francisco banned plastic grocery bags, you saw the number of instances of people going to the ER with things like salmonella and other related illnesses spike.",BARELY_TRUE,Fake,Mixed,james-quintero,none,0.52,Real,Mixed,False,True,1.4090971946716309,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
9
+ 1084.json,"Now, there was a time when someone like Scalia and Ginsburg got 95-plus votes.",TRUE,Real,True,lindsey-graham,republican,0.52,Real,Mixed,True,False,0.8899619579315186,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
10
+ 13203.json,Toomey and Trump will ban abortion and punish women who have them.,FALSE,Fake,False,naral-pro-choice,organization,0.35,Fake,Mixed,True,False,0.9519002437591553,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
11
+ 5993.json,Says Larry Taylor gave in-state tuition to illegal immigrants.,PANTS_FIRE,Fake,False,conservative-voters-texas-pac,none,0.52,Real,Mixed,False,False,1.0933752059936523,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
12
+ 4116.json,"POTUS economists: Stimulus Has Cost $278,000 per job.",FALSE,Fake,False,john-boehner,republican,0.52,Real,Mixed,False,False,0.6499719619750977,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
13
+ 12599.json,"If you are a member of union, your median weekly income is roughly $200 more than if you are a nonunion member, and that doesnt include benefits.",TRUE,Real,True,thomas-perez,democrat,0.52,Real,Mixed,True,False,0.7228951454162598,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
14
+ 7819.json,"18 percent of our land in our state right now is either federally, state or county owned for conservation purposes.",TRUE,Real,True,alberta-darling,republican,0.52,Real,Mixed,True,False,0.9661009311676025,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
15
+ 2374.json,"Like me, Scott Walker opposed the 1998 transportation bill and the $9 billion of wasteful spending.",FALSE,Fake,False,jim-sensenbrenner,republican,0.52,Real,Mixed,False,False,0.9515271186828613,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
16
+ 3941.json,The Democrat-led Senate has failed to pass a budget for 750 days ... Senate Democrats have neglected one of their most basic responsibilities.,HALF_TRUE,Real,Mixed,rob-portman,republican,0.54,Real,Mixed,True,True,0.6562759876251221,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
17
+ 1280.json,"Here in Florida, Ive slashed government by 10 percent. That's $7 billion.",BARELY_TRUE,Fake,Mixed,charlie-crist,democrat,0.52,Real,Mixed,False,True,0.9024300575256348,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
18
+ 11661.json,If Trump had just put his fathers money in a mutual fund ... hed have $8 billion.,FALSE,Fake,False,occupy-democrats,organization,0.36,Fake,Mixed,True,False,0.8690879344940186,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
19
+ 6.json,Barack Obama and Hillary Clinton have changed their positions (on the Iraq war withdrawal) to follow Chris Dodd.,HALF_TRUE,Real,Mixed,chris-dodd,democrat,0.53,Real,Mixed,True,True,1.3197407722473145,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
20
+ 3869.json,"When the union says I want to eliminate tenure, thats not true.",HALF_TRUE,Real,Mixed,chris-christie,republican,0.52,Real,Mixed,True,True,0.9137670993804932,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
21
+ 11229.json,"Last year, we had zero percent growth in GDP in Virginia ...The only states that did worse than us were Alaska and Mississippi.",TRUE,Real,True,glen-sturtevant,republican,0.53,Real,Mixed,True,False,0.8765408992767334,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
22
+ 12703.json,"In South Sudan, more teenage girls die in childbirth than finish high school.",MOSTLY_TRUE,Real,True,unicef,none,0.52,Real,Mixed,True,False,1.0562257766723633,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
23
+ 6320.json,Gov. Romneys plan would cut taxes for the folks at the very top.,MOSTLY_TRUE,Real,True,barack-obama,democrat,0.52,Real,Mixed,True,False,0.991412878036499,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
24
+ 7136.json,Says the state is required pick up 70 percent of the cost of K-12 education.,BARELY_TRUE,Fake,Mixed,john-kitzhaber,democrat,0.53,Real,Mixed,False,True,0.9649970531463623,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
25
+ 7552.json,Georgia loses millions of dollars of revenue because people go out of state to buy fireworks.,MOSTLY_TRUE,Real,True,jeff-mullis,republican,0.53,Real,Mixed,True,False,0.7320408821105957,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
26
+ 11118.json,"With the exception of baby formula, the federal government does not require any food to carry an expiration date, and state laws vary widely.",TRUE,Real,True,john-oliver,none,0.52,Real,Mixed,True,False,1.1864137649536133,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
27
+ 7244.json,The amount of money that we put into running our own state legislature is nearly as much as we put into the University of Rhode Island.,FALSE,Fake,False,dawson-hodgson,republican,0.52,Real,Mixed,False,False,1.2284369468688965,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
28
+ 3215.json,Four state Assembly Democrats scored a death blow to northeast Wisconsins economy by killing hundreds of jobs at a potential Bass Pro Shops near Green Bay.,PANTS_FIRE,Fake,False,scott-suder,republican,0.53,Real,Mixed,False,False,1.3722262382507324,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
29
+ 6858.json,"Says opponent U.S. Rep. Gwen Moore is Wisconsins most absent member of Congress, missing nearly 17 percent of the House votes in the second quarter of 2012.",MOSTLY_TRUE,Real,True,dan-sebring,republican,0.52,Real,Mixed,True,False,1.0067930221557617,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
30
+ 4161.json,Active duty males in the military are twice as likely to develop prostate cancer than their civilian counterparts.,MOSTLY_TRUE,Real,True,cliff-stearns,republican,0.52,Real,Mixed,True,False,0.817112922668457,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
31
+ 4568.json,"Says Vince Polistina is collecting nearly $70,000 in taxpayer-funded salaries -- plus a government pension.",BARELY_TRUE,Fake,Mixed,new-jersey-democratic-state-committee,democrat,0.53,Real,Mixed,False,True,0.802901029586792,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
32
+ 11256.json,"Bernie Sanders opposesthe Trans-Pacific Partnership, and Hillary Clinton supportsit.",MOSTLY_TRUE,Real,True,viral-image,none,0.52,Real,Mixed,True,False,0.9945018291473389,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
33
+ 10450.json,"When you look at the earned income tax credit, it has about a 25 percent fraud rate. Were looking at $20 billion to $30 billion.",HALF_TRUE,Real,Mixed,rand-paul,republican,0.54,Real,Mixed,True,True,1.004997968673706,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
34
+ 8419.json,Virtually every person across this country has seen premiums going up and up and up due to Obamacare.,FALSE,Fake,False,ted-cruz,republican,0.52,Real,Mixed,False,False,0.9335529804229736,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
35
+ 1015.json,Democrats in Congress had control since January of 2007. They haven't passed a law making waterboarding illegal. They haven't gone into any of these things and changed law.,BARELY_TRUE,Fake,Mixed,newt-gingrich,republican,0.54,Real,Mixed,False,True,0.8649272918701172,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
36
+ 8557.json,"The United States has never been richer, if you look at per capita GDP.",TRUE,Real,True,jan-schakowsky,democrat,0.53,Real,Mixed,True,False,1.4557609558105469,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
37
+ 3103.json,"George Allen voted for budgets that increased the national debt by $16,400 for every second he served in the U.S. Senate.",TRUE,Real,True,jamie-radtke,republican,0.52,Real,Mixed,True,False,0.8361649513244629,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
38
+ 1576.json,Health insurance companies' costs are only 4 percent of all health care spending.,TRUE,Real,True,americas-health-insurance-plans,none,0.53,Real,Mixed,True,False,1.2063438892364502,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
39
+ 4131.json,"The State Election Board has issued nearly $275,000 in fines to violators of absentee ballot laws.",TRUE,Real,True,brian-kemp,republican,0.53,Real,Mixed,True,False,1.1224148273468018,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
40
+ 7677.json,"The sex-offender registry has been around for a long time, and the research thats out there says that it has no positive impact on the public safety.",MOSTLY_TRUE,Real,True,ohio-public-defenders-office,none,0.52,Real,Mixed,True,False,1.0617139339447021,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
41
+ 4323.json,"When lenders foreclose on homes, they typically suffer losses that exceed 30 percent of the value of the home.",MOSTLY_TRUE,Real,True,thaddeus-mccotter,republican,0.52,Real,Mixed,True,False,0.8316628932952881,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
42
+ 1955.json,"U.S. Rep. Jim Langevin didn't want a border fence to block illegal immigration ""because he is afraid that someone will get hurt trying to go around the fence.""",PANTS_FIRE,Fake,False,michael-j-gardiner,republican,0.52,Real,Mixed,False,False,0.8672749996185303,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
43
+ 4017.json,"On the day of the New Hampshire primary in 1980, the top 13 people of Ronald Reagans staff quit.",MOSTLY_TRUE,Real,True,newt-gingrich,republican,0.52,Real,Mixed,True,False,1.1178889274597168,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
44
+ 769.json,Barack Obama ... 96 percent of his votes have been solely along party line.,TRUE,Real,True,sarah-palin,republican,0.53,Real,Mixed,True,False,0.8946268558502197,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
45
+ 171.json,''The leading Democratic candidate once said that the unfettered free market is the most destructive force in modern America.'',FALSE,Fake,False,rudy-giuliani,republican,0.52,Real,Mixed,False,False,0.7632577419281006,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
46
+ 9944.json,The U.S. Constitution is the oldest written constitution still in use today among nations.,TRUE,Real,True,bob-goodlatte,republican,0.52,Real,Mixed,True,False,0.8168671131134033,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
47
+ 7448.json,Felony crimes in the city of Atlanta are the lowest they have been since 1969.,TRUE,Real,True,kasim-reed,democrat,0.52,Real,Mixed,True,False,0.9511759281158447,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
48
+ 2760.json,Robert Hurt supported a bill that helped the uranium industry after taking contributions from the industry and because his father had a stake in it.,BARELY_TRUE,Fake,Mixed,sierra-club,none,0.52,Real,Mixed,False,True,1.0464277267456055,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
49
+ 10283.json,"In 1990, the U.S. led the world in the percentage of 25-34 year olds with college degrees. Today we are in 12th place.",MOSTLY_TRUE,Real,True,bernie-s,independent,0.54,Real,Mixed,True,False,0.966181755065918,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
50
+ 10271.json,"Atlanta Mayor Kasim Reed became a city resident in the last annexation, opening the door to his candidacy.",MOSTLY_TRUE,Real,True,keisha-lance-bottoms,democrat,0.52,Real,Mixed,True,False,0.9616649150848389,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
51
+ 9788.json,Ive been here almost every day.,BARELY_TRUE,Fake,Mixed,jay-nixon,democrat,0.53,Real,Mixed,False,True,1.5777778625488281,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
52
+ 8434.json,"Mark Herring voted to allow sex offenders, including rapists, to serve shorter sentences.",BARELY_TRUE,Fake,Mixed,mark-obenshain,republican,0.52,Real,Mixed,False,True,0.902681827545166,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
53
+ 4189.json,"Says U.S. Rep. Ron Kinds stimulus cost taxpayers $278,000 per job.",FALSE,Fake,False,national-republican-congressional-committee,republican,0.52,Real,Mixed,False,False,0.9022071361541748,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
54
+ 11867.json,Says CNN reported Ben Carson was taking a break from campaigning and the Cruz campaign forwarded that news to our volunteers.,FALSE,Fake,False,ted-cruz,republican,0.52,Real,Mixed,False,False,1.0397439002990723,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
55
+ 7285.json,"Says Chris Christies plan to kick-start our economy is to propose an income tax cut that disproportionately benefits the wealthy, and...hes still proposing it.",PANTS_FIRE,Fake,False,barbara-buono,democrat,0.54,Real,Mixed,False,False,1.4161858558654785,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
56
+ 1444.json,"Obama came into office ""with a $1.3 trillion deficit before I had passed any law. ... We came in with $8 trillion worth of debt over the next decade.""",MOSTLY_TRUE,Real,True,barack-obama,democrat,0.54,Real,Mixed,True,False,0.9389801025390625,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
57
+ 7537.json,"The United States spends $2.2 billion on free cell phones, $27 million on Moroccan pottery classes and pays for the travel expenses for the watermelon queen in Alabama.",HALF_TRUE,Real,Mixed,sean-duffy,republican,0.54,Real,Mixed,True,True,1.04280686378479,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
58
+ 2796.json,Half of children struck by cars near schools are hit by parents driving children to school.,FALSE,Fake,False,safe-routes-schools,none,0.52,Real,Mixed,False,False,1.2210850715637207,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
59
+ 11150.json,Caution: Kissing and cuddling chickens can be hazardous to your health.,TRUE,Real,True,centers-disease-control,none,0.52,Real,Mixed,True,False,0.8850150108337402,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
60
+ 12650.json,You know what (Saddam Hussein) did well? He killed terrorists.,BARELY_TRUE,Fake,Mixed,donald-trump,republican,0.52,Real,Mixed,False,True,0.8362798690795898,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
61
+ 3.json,"New Mexico was 46th in teacher pay (when he was elected), now we're 29th.",MOSTLY_TRUE,Real,True,bill-richardson,democrat,0.52,Real,Mixed,True,False,0.9594571590423584,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
62
+ 1512.json,Fifty-seven of Rubios 100 ideas ultimately became law.,HALF_TRUE,Real,Mixed,marco-rubio,republican,0.52,Real,Mixed,True,True,1.4569880962371826,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
63
+ 3235.json,"Under the White Houses budget proposal, we will not be adding more to the national debt by the middle of the decade.",FALSE,Fake,False,barack-obama,democrat,0.36,Fake,Mixed,True,False,1.4381189346313477,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
64
+ 7909.json,"Says that when the Rolling Stones performed in an Austin park, they paid $25,000 to the nearby city of Rollingwood for one night of inconvenience.",TRUE,Real,True,barry-bone,none,0.52,Real,Mixed,True,False,1.264172077178955,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
65
+ 6812.json,"The Obama administrations green stimulus program funneled close to $2 billion dollars to overseas firms, creating thousands of jobs in China.",BARELY_TRUE,Fake,Mixed,americans-tax-reform,none,0.53,Real,Mixed,False,True,1.306333303451538,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
66
+ 7401.json,"Says, Statistics indicate that one in eight children, and one in 18 adults in Oregon suffers from mental illness.",MOSTLY_TRUE,Real,True,peter-courtney,democrat,0.52,Real,Mixed,True,False,1.0519630908966064,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
67
+ 4391.json,Says Barack Obamas favorability rating in Israel once clocked in at 4 percent.,BARELY_TRUE,Fake,Mixed,kinky-friedman,democrat,0.53,Real,Mixed,False,True,0.9937312602996826,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
68
+ 9472.json,Oregon school districts are directing 24 percent of their food budgets to purchase local foods. Thats the highest percentage in the country.,HALF_TRUE,Real,Mixed,oregon-department-agriculture,government-body,0.54,Real,Mixed,True,True,1.0283551216125488,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
69
+ 12834.json,On whether hes had a relationship with Vladimir Putin.,FALSE,Fake,False,donald-trump,republican,0.35,Fake,Mixed,True,False,1.1149241924285889,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
70
+ 5363.json,Obamacare ... will kill jobs across America.,FALSE,Fake,False,us-chamber-commerce,none,0.53,Real,Mixed,False,False,1.557218074798584,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
71
+ 9080.json,You can buy lobster with food stamps.,TRUE,Real,True,greg-morris,republican,0.69,Real,True,True,True,1.4478328227996826,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
72
+ 12648.json,"Many Nevadans relied on Uber for work, but after accepting $70,000 from taxi companies, Catherine Cortez Masto went after Uber ... (driving) them out of town.",BARELY_TRUE,Fake,Mixed,freedom-partners-action-fund,organization,0.54,Real,Mixed,False,True,1.1827037334442139,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
73
+ 3330.json,Says Tri-Rail shows the potential problems with investing in high-speed rail.,BARELY_TRUE,Fake,Mixed,rick-scott,republican,0.52,Real,Mixed,False,True,1.007385015487671,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
74
+ 6045.json,Says Hawaii Gov. Neil Abercrombie made a late-night visit to Kinkos to forge President Barack Obamas birth certificate two days before Obama unveiled it to the media.,PANTS_FIRE,Fake,False,chain-email,none,0.52,Real,Mixed,False,False,1.081031322479248,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
75
+ 8606.json,Republicans have proposed dozens of (health care) solutions designed to help control costs and improve quality.,HALF_TRUE,Real,Mixed,ron-johnson,republican,0.53,Real,Mixed,True,True,1.0788249969482422,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
76
+ 13484.json,"Harambe received 15,000 votes in the presidential election.",PANTS_FIRE,Fake,False,blog-posting,none,0.36,Fake,Mixed,True,False,0.9677271842956543,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
77
+ 12168.json,Expanding the sale of full-strength beer and wine to grocery stores isgood for everybody.,HALF_TRUE,Real,Mixed,your-choice-colorado,organization,0.52,Real,Mixed,True,True,0.8326687812805176,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
78
+ 2252.json,Republicans tried to block the deficit commission.,HALF_TRUE,Real,Mixed,tim-kaine,democrat,0.53,Real,Mixed,True,True,1.0242087841033936,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
79
+ 7696.json,Not one dime gets added to the deficit because of Social Security.,BARELY_TRUE,Fake,Mixed,mark-pocan,democrat,0.53,Real,Mixed,False,True,1.0535731315612793,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
80
+ 13186.json,"Says Donald Trump proposes to deport 16 million people, 11 million who are here without documents, and both Donald Trump and Mike Pence want to get rid of birthright citizenship.",HALF_TRUE,Real,Mixed,tim-kaine,democrat,0.52,Real,Mixed,True,True,1.6496412754058838,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
81
+ 6678.json,"The Senate has not passed a budget in more than three years, not a good budget, not a bad budget, no budget.",TRUE,Real,True,leonard-lance,republican,0.52,Real,Mixed,True,False,1.0171582698822021,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
82
+ 2830.json,A proposal to reduce the deficit involved large tax cuts for the top bracket.,HALF_TRUE,Real,Mixed,paul-krugman,none,0.52,Real,Mixed,True,True,1.0472040176391602,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
83
+ 9112.json,22 Countries Invaded by the U.S. in 20 Years.,FALSE,Fake,False,facebook-posts,none,0.53,Real,Mixed,False,False,1.4772350788116455,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
84
+ 2774.json,On toll roads.,HALF_TRUE,Real,Mixed,scott-walker,republican,0.52,Real,Mixed,True,True,1.2251579761505127,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
85
+ 4698.json,"Says state Senate President Stephen Sweeney gave us the nations highest income tax rates, driving out jobs.",FALSE,Fake,False,americans-prosperity-new-jersey,none,0.52,Real,Mixed,False,False,1.2662239074707031,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
86
+ 1066.json,ACORN will be a paid partner with the Census Bureau and they will be in charge of going door-to-door and collecting data from the American public.,PANTS_FIRE,Fake,False,michele-bachmann,republican,0.52,Real,Mixed,False,False,0.9788448810577393,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
87
+ 13155.json,Says Ron Johnson helped companies ship jobs overseas.,FALSE,Fake,False,russ-feingold,democrat,0.36,Fake,Mixed,True,False,1.1062400341033936,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
88
+ 13047.json,Barack Obama has doubled our national debt. Doubled it. Its going to be close to $20 trillion when he leaves.,HALF_TRUE,Real,Mixed,donald-trump,republican,0.54,Real,Mixed,True,True,1.301180124282837,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
89
+ 13270.json,We know that more than half of Hillary Clintons meetings while she was secretary of state were given to major contributors to the Clinton Foundation.,BARELY_TRUE,Fake,Mixed,mike-pence,republican,0.53,Real,Mixed,False,True,1.4784371852874756,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
90
+ 8798.json,And weve constantly been the lowest unemployed county in the state.,FALSE,Fake,False,bob-terry,none,0.53,Real,Mixed,False,False,1.0450530052185059,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
91
+ 1652.json,On recess appointments.,HALF_TRUE,Real,Mixed,barack-obama,democrat,0.52,Real,Mixed,True,True,1.153883934020996,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
92
+ 5824.json,We didnt go out asking people to join the stand your ground task force.,FALSE,Fake,False,jennifer-carroll,republican,0.53,Real,Mixed,False,False,1.2436339855194092,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
93
+ 12511.json,There is no system to vet refugees from the Middle East.,FALSE,Fake,False,donald-trump,republican,0.35,Fake,Mixed,True,False,1.1522879600524902,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
94
+ 2652.json,Says Rick Perry recently said he wanted another (Texas) business tax.,PANTS_FIRE,Fake,False,kathie-glass,libertarian,0.52,Real,Mixed,False,False,1.1576693058013916,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
95
+ 6141.json,Federal health care reforms amounted to the government takeover of health care.,PANTS_FIRE,Fake,False,tommy-thompson,republican,0.53,Real,Mixed,False,False,1.3367388248443604,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
96
+ 1005.json,Obama used $20 million in federal money to emmigrate (sic) Hamas Refugees to the USA.,PANTS_FIRE,Fake,False,chain-email,none,0.53,Real,Mixed,False,False,1.0034828186035156,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
97
+ 1760.json,"Says in 2003 Texas cut $10 billion out of the entire budget, yet we put $1.8 billion more into public education. We put $800 million more into health and human services.",BARELY_TRUE,Fake,Mixed,rick-perry,republican,0.54,Real,Mixed,False,True,1.3652348518371582,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
98
+ 13486.json,Alexander Hamilton was an immigration hawk.,HALF_TRUE,Real,Mixed,rush-limbaugh,none,0.52,Real,Mixed,True,True,1.230626106262207,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
99
+ 7790.json,Boston Marathon bombing suspect Tamerlan Tsarnaev is buried not far from President Kennedys grave.,PANTS_FIRE,Fake,False,john-depetro,talk-show-host,0.52,Real,Mixed,False,False,1.2205047607421875,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
100
+ 10512.json,The minimum wage has risen $2.35 in the last two years. Thats 31 percent.,FALSE,Fake,False,leonidas-raptakis,democrat,0.54,Real,Mixed,False,False,1.1163349151611328,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
101
+ 4337.json,"Before Medicare, only 51 percent of Americans 65 and older had health care coverage and nearly 30 percent lived below the poverty line. Today, thanks to Medicare ... nearly all seniors have coverage a",HALF_TRUE,Real,Mixed,ron-kind,democrat,0.54,Real,Mixed,True,True,1.205260992050171,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
syscred/liar_benchmark_test.json ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "timestamp": "2026-02-03T11:06:54.379689",
3
+ "dataset": "LIAR",
4
+ "metrics": {
5
+ "total_statements": 100,
6
+ "successful_evaluations": 100,
7
+ "error_count": 0,
8
+ "error_rate": 0.0,
9
+ "binary": {
10
+ "accuracy": 0.57,
11
+ "precision": 1.0,
12
+ "recall": 0.1568627450980392,
13
+ "f1": 0.2711864406779661,
14
+ "confusion_matrix": [
15
+ [
16
+ 8,
17
+ 43
18
+ ],
19
+ [
20
+ 0,
21
+ 49
22
+ ]
23
+ ]
24
+ },
25
+ "ternary": {
26
+ "accuracy": 0.34,
27
+ "macro_f1": 0.18686868686868685,
28
+ "confusion_matrix": [
29
+ [
30
+ 0,
31
+ 35,
32
+ 0
33
+ ],
34
+ [
35
+ 0,
36
+ 33,
37
+ 0
38
+ ],
39
+ [
40
+ 0,
41
+ 31,
42
+ 1
43
+ ]
44
+ ]
45
+ },
46
+ "classification_report": {
47
+ "Fake": {
48
+ "precision": 1.0,
49
+ "recall": 0.1568627450980392,
50
+ "f1-score": 0.2711864406779661,
51
+ "support": 51.0
52
+ },
53
+ "Real": {
54
+ "precision": 0.532608695652174,
55
+ "recall": 1.0,
56
+ "f1-score": 0.6950354609929078,
57
+ "support": 49.0
58
+ },
59
+ "accuracy": 0.57,
60
+ "macro avg": {
61
+ "precision": 0.7663043478260869,
62
+ "recall": 0.5784313725490196,
63
+ "f1-score": 0.483110950835437,
64
+ "support": 100.0
65
+ },
66
+ "weighted avg": {
67
+ "precision": 0.7709782608695653,
68
+ "recall": 0.57,
69
+ "f1-score": 0.4788724606322875,
70
+ "support": 100.0
71
+ }
72
+ },
73
+ "score_distribution": {
74
+ "mean": 0.5134000000000001,
75
+ "min": 0.35,
76
+ "max": 0.69,
77
+ "median": 0.52
78
+ },
79
+ "per_party": {
80
+ "republican": {
81
+ "count": 43,
82
+ "accuracy": 0.5581395348837209
83
+ },
84
+ "democrat": {
85
+ "count": 27,
86
+ "accuracy": 0.6296296296296297
87
+ }
88
+ },
89
+ "elapsed_time": 108.7882571220398,
90
+ "statements_per_second": 0.9192168589282478
91
+ },
92
+ "config": {
93
+ "threshold": 0.5,
94
+ "use_graphrag": true,
95
+ "weights": {
96
+ "source_reputation": 0.22,
97
+ "domain_age": 0.08,
98
+ "sentiment_neutrality": 0.13,
99
+ "entity_presence": 0.13,
100
+ "coherence": 0.12,
101
+ "fact_check": 0.17,
102
+ "graph_context": 0.15
103
+ }
104
+ }
105
+ }
syscred/liar_dataset.py ADDED
@@ -0,0 +1,359 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ LIAR Dataset Module - SysCRED
4
+ ==============================
5
+ Loader for the LIAR benchmark dataset (Wang, 2017).
6
+ Standard benchmark for fake news detection with 12,800+ political statements.
7
+
8
+ Dataset: https://www.cs.ucsb.edu/~william/data/liar_dataset.zip
9
+ Paper: "Liar, Liar Pants on Fire" (ACL 2017)
10
+
11
+ (c) Dominique S. Loyer - PhD Thesis Prototype
12
+ """
13
+
14
+ import os
15
+ import csv
16
+ from pathlib import Path
17
+ from typing import List, Dict, Optional, Tuple
18
+ from dataclasses import dataclass, field
19
+ from enum import Enum
20
+
21
+
22
+ class LiarLabel(Enum):
23
+ """Six-way truthfulness labels from PolitiFact."""
24
+ PANTS_FIRE = 0
25
+ FALSE = 1
26
+ BARELY_TRUE = 2
27
+ HALF_TRUE = 3
28
+ MOSTLY_TRUE = 4
29
+ TRUE = 5
30
+
31
+ @classmethod
32
+ def from_string(cls, label: str) -> 'LiarLabel':
33
+ """Convert string label to enum."""
34
+ mapping = {
35
+ 'pants-fire': cls.PANTS_FIRE,
36
+ 'false': cls.FALSE,
37
+ 'barely-true': cls.BARELY_TRUE,
38
+ 'half-true': cls.HALF_TRUE,
39
+ 'mostly-true': cls.MOSTLY_TRUE,
40
+ 'true': cls.TRUE
41
+ }
42
+ return mapping.get(label.lower().strip(), cls.HALF_TRUE)
43
+
44
+ def to_binary(self) -> str:
45
+ """Convert to binary label (Fake/Real)."""
46
+ if self.value <= 2: # pants-fire, false, barely-true
47
+ return "Fake"
48
+ else: # half-true, mostly-true, true
49
+ return "Real"
50
+
51
+ def to_ternary(self) -> str:
52
+ """Convert to ternary label (False/Mixed/True)."""
53
+ if self.value <= 1: # pants-fire, false
54
+ return "False"
55
+ elif self.value <= 3: # barely-true, half-true
56
+ return "Mixed"
57
+ else: # mostly-true, true
58
+ return "True"
59
+
60
+
61
+ @dataclass
62
+ class LiarStatement:
63
+ """A single statement from the LIAR dataset."""
64
+ id: str
65
+ label: LiarLabel
66
+ statement: str
67
+ subject: str = ""
68
+ speaker: str = ""
69
+ job_title: str = ""
70
+ state: str = ""
71
+ party: str = ""
72
+ barely_true_count: int = 0
73
+ false_count: int = 0
74
+ half_true_count: int = 0
75
+ mostly_true_count: int = 0
76
+ pants_fire_count: int = 0
77
+ context: str = ""
78
+
79
+ @property
80
+ def binary_label(self) -> str:
81
+ """Get binary label (Fake/Real)."""
82
+ return self.label.to_binary()
83
+
84
+ @property
85
+ def ternary_label(self) -> str:
86
+ """Get ternary label (False/Mixed/True)."""
87
+ return self.label.to_ternary()
88
+
89
+ @property
90
+ def numeric_label(self) -> int:
91
+ """Get numeric label (0-5)."""
92
+ return self.label.value
93
+
94
+ @property
95
+ def speaker_credit_history(self) -> Dict[str, int]:
96
+ """Get speaker's historical credibility as a dictionary."""
97
+ return {
98
+ 'barely_true': self.barely_true_count,
99
+ 'false': self.false_count,
100
+ 'half_true': self.half_true_count,
101
+ 'mostly_true': self.mostly_true_count,
102
+ 'pants_fire': self.pants_fire_count
103
+ }
104
+
105
+ def to_dict(self) -> Dict:
106
+ """Convert to dictionary for JSON serialization."""
107
+ return {
108
+ 'id': self.id,
109
+ 'label': self.label.name,
110
+ 'binary_label': self.binary_label,
111
+ 'ternary_label': self.ternary_label,
112
+ 'statement': self.statement,
113
+ 'subject': self.subject,
114
+ 'speaker': self.speaker,
115
+ 'job_title': self.job_title,
116
+ 'state': self.state,
117
+ 'party': self.party,
118
+ 'context': self.context,
119
+ 'speaker_credit_history': self.speaker_credit_history
120
+ }
121
+
122
+
123
+ class LIARDataset:
124
+ """
125
+ Loader for LIAR benchmark dataset.
126
+
127
+ The LIAR dataset contains 12,836 short statements labeled with
128
+ six fine-grained truthfulness ratings from PolitiFact.
129
+
130
+ Files expected:
131
+ - train.tsv (10,269 statements)
132
+ - valid.tsv (1,284 statements)
133
+ - test.tsv (1,283 statements)
134
+
135
+ Usage:
136
+ dataset = LIARDataset("/path/to/liar_dataset")
137
+ train_data = dataset.load_split("train")
138
+
139
+ for statement in train_data:
140
+ print(f"{statement.statement} -> {statement.label.name}")
141
+ """
142
+
143
+ # TSV column indices
144
+ COL_ID = 0
145
+ COL_LABEL = 1
146
+ COL_STATEMENT = 2
147
+ COL_SUBJECT = 3
148
+ COL_SPEAKER = 4
149
+ COL_JOB = 5
150
+ COL_STATE = 6
151
+ COL_PARTY = 7
152
+ COL_BARELY_TRUE = 8
153
+ COL_FALSE = 9
154
+ COL_HALF_TRUE = 10
155
+ COL_MOSTLY_TRUE = 11
156
+ COL_PANTS_FIRE = 12
157
+ COL_CONTEXT = 13
158
+
159
+ def __init__(self, data_dir: Optional[str] = None):
160
+ """
161
+ Initialize LIAR dataset loader.
162
+
163
+ Args:
164
+ data_dir: Path to directory containing train.tsv, valid.tsv, test.tsv
165
+ If None, uses default location: syscred/datasets/liar/
166
+ """
167
+ if data_dir:
168
+ self.data_dir = Path(data_dir)
169
+ else:
170
+ # Default: relative to this file
171
+ self.data_dir = Path(__file__).parent / "datasets" / "liar"
172
+
173
+ self._cache: Dict[str, List[LiarStatement]] = {}
174
+
175
+ print(f"[LIAR] Dataset directory: {self.data_dir}")
176
+
177
+ def _parse_int_safe(self, value: str) -> int:
178
+ """Safely parse int, returning 0 on failure."""
179
+ try:
180
+ return int(value.strip())
181
+ except (ValueError, AttributeError):
182
+ return 0
183
+
184
+ def _parse_row(self, row: List[str]) -> Optional[LiarStatement]:
185
+ """Parse a single TSV row into a LiarStatement."""
186
+ try:
187
+ # Ensure we have enough columns
188
+ if len(row) < 3:
189
+ return None
190
+
191
+ # Pad row if needed
192
+ while len(row) < 14:
193
+ row.append("")
194
+
195
+ return LiarStatement(
196
+ id=row[self.COL_ID].strip(),
197
+ label=LiarLabel.from_string(row[self.COL_LABEL]),
198
+ statement=row[self.COL_STATEMENT].strip(),
199
+ subject=row[self.COL_SUBJECT].strip() if len(row) > self.COL_SUBJECT else "",
200
+ speaker=row[self.COL_SPEAKER].strip() if len(row) > self.COL_SPEAKER else "",
201
+ job_title=row[self.COL_JOB].strip() if len(row) > self.COL_JOB else "",
202
+ state=row[self.COL_STATE].strip() if len(row) > self.COL_STATE else "",
203
+ party=row[self.COL_PARTY].strip() if len(row) > self.COL_PARTY else "",
204
+ barely_true_count=self._parse_int_safe(row[self.COL_BARELY_TRUE]) if len(row) > self.COL_BARELY_TRUE else 0,
205
+ false_count=self._parse_int_safe(row[self.COL_FALSE]) if len(row) > self.COL_FALSE else 0,
206
+ half_true_count=self._parse_int_safe(row[self.COL_HALF_TRUE]) if len(row) > self.COL_HALF_TRUE else 0,
207
+ mostly_true_count=self._parse_int_safe(row[self.COL_MOSTLY_TRUE]) if len(row) > self.COL_MOSTLY_TRUE else 0,
208
+ pants_fire_count=self._parse_int_safe(row[self.COL_PANTS_FIRE]) if len(row) > self.COL_PANTS_FIRE else 0,
209
+ context=row[self.COL_CONTEXT].strip() if len(row) > self.COL_CONTEXT else ""
210
+ )
211
+ except Exception as e:
212
+ print(f"[LIAR] Parse error: {e}")
213
+ return None
214
+
215
+ def load_split(self, split: str = "test") -> List[LiarStatement]:
216
+ """
217
+ Load a dataset split.
218
+
219
+ Args:
220
+ split: One of 'train', 'valid', 'test'
221
+
222
+ Returns:
223
+ List of LiarStatement objects
224
+ """
225
+ if split in self._cache:
226
+ return self._cache[split]
227
+
228
+ file_path = self.data_dir / f"{split}.tsv"
229
+
230
+ if not file_path.exists():
231
+ raise FileNotFoundError(
232
+ f"LIAR dataset file not found: {file_path}\n"
233
+ f"Download from: https://www.cs.ucsb.edu/~william/data/liar_dataset.zip"
234
+ )
235
+
236
+ statements = []
237
+
238
+ with open(file_path, 'r', encoding='utf-8') as f:
239
+ reader = csv.reader(f, delimiter='\t')
240
+ for row in reader:
241
+ stmt = self._parse_row(row)
242
+ if stmt:
243
+ statements.append(stmt)
244
+
245
+ self._cache[split] = statements
246
+ print(f"[LIAR] Loaded {len(statements)} statements from {split}.tsv")
247
+
248
+ return statements
249
+
250
+ def get_statements(self, split: str = "test") -> List[str]:
251
+ """Get just the statement texts."""
252
+ return [s.statement for s in self.load_split(split)]
253
+
254
+ def get_labels(self, split: str = "test", label_type: str = "binary") -> List[str]:
255
+ """
256
+ Get labels for a split.
257
+
258
+ Args:
259
+ split: Dataset split
260
+ label_type: 'binary' (Fake/Real), 'ternary' (False/Mixed/True),
261
+ 'six' (original 6-way), 'numeric' (0-5)
262
+ """
263
+ statements = self.load_split(split)
264
+
265
+ if label_type == "binary":
266
+ return [s.binary_label for s in statements]
267
+ elif label_type == "ternary":
268
+ return [s.ternary_label for s in statements]
269
+ elif label_type == "numeric":
270
+ return [s.numeric_label for s in statements]
271
+ else: # six / original
272
+ return [s.label.name for s in statements]
273
+
274
+ def get_label_distribution(self, split: str = "test") -> Dict[str, int]:
275
+ """Get count of each label in a split."""
276
+ statements = self.load_split(split)
277
+ distribution = {}
278
+
279
+ for stmt in statements:
280
+ label = stmt.label.name
281
+ distribution[label] = distribution.get(label, 0) + 1
282
+
283
+ return distribution
284
+
285
+ def get_sample(self, split: str = "test", n: int = 10) -> List[LiarStatement]:
286
+ """Get a random sample of statements."""
287
+ import random
288
+ statements = self.load_split(split)
289
+ return random.sample(statements, min(n, len(statements)))
290
+
291
+ def get_by_party(self, split: str, party: str) -> List[LiarStatement]:
292
+ """Filter statements by political party."""
293
+ statements = self.load_split(split)
294
+ return [s for s in statements if s.party.lower() == party.lower()]
295
+
296
+ def get_by_speaker(self, split: str, speaker: str) -> List[LiarStatement]:
297
+ """Filter statements by speaker name."""
298
+ statements = self.load_split(split)
299
+ return [s for s in statements if speaker.lower() in s.speaker.lower()]
300
+
301
+ def iter_batches(self, split: str, batch_size: int = 32):
302
+ """Iterate over statements in batches."""
303
+ statements = self.load_split(split)
304
+
305
+ for i in range(0, len(statements), batch_size):
306
+ yield statements[i:i + batch_size]
307
+
308
+ def stats(self) -> Dict[str, Any]:
309
+ """Get dataset statistics."""
310
+ stats = {}
311
+
312
+ for split in ['train', 'valid', 'test']:
313
+ try:
314
+ statements = self.load_split(split)
315
+ stats[split] = {
316
+ 'count': len(statements),
317
+ 'label_distribution': self.get_label_distribution(split),
318
+ 'unique_speakers': len(set(s.speaker for s in statements)),
319
+ 'unique_parties': list(set(s.party for s in statements if s.party))
320
+ }
321
+ except FileNotFoundError:
322
+ stats[split] = {'error': 'File not found'}
323
+
324
+ return stats
325
+
326
+
327
+ # Convenience function
328
+ def load_liar(split: str = "test", data_dir: Optional[str] = None) -> List[LiarStatement]:
329
+ """Quick loader for LIAR dataset."""
330
+ dataset = LIARDataset(data_dir)
331
+ return dataset.load_split(split)
332
+
333
+
334
+ if __name__ == "__main__":
335
+ print("=" * 60)
336
+ print("LIAR Dataset Loader - Test")
337
+ print("=" * 60)
338
+
339
+ # Test with default path
340
+ try:
341
+ dataset = LIARDataset()
342
+
343
+ print("\n📊 Dataset Statistics:")
344
+ stats = dataset.stats()
345
+ for split, info in stats.items():
346
+ print(f"\n{split.upper()}:")
347
+ if 'error' in info:
348
+ print(f" ❌ {info['error']}")
349
+ else:
350
+ print(f" Total: {info['count']}")
351
+ print(f" Speakers: {info['unique_speakers']}")
352
+ print(f" Parties: {info['unique_parties']}")
353
+ print(f" Labels: {info['label_distribution']}")
354
+
355
+ except Exception as e:
356
+ print(f"\n❌ Error: {e}")
357
+ print("\nTo use this module, download the LIAR dataset:")
358
+ print(" wget https://www.cs.ucsb.edu/~william/data/liar_dataset.zip")
359
+ print(" unzip liar_dataset.zip -d 02_Code/syscred/datasets/liar/")
syscred/run_liar_benchmark.py ADDED
@@ -0,0 +1,434 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ LIAR Benchmark Runner - SysCRED
5
+ ================================
6
+ Scientific evaluation of SysCRED on the LIAR benchmark dataset.
7
+
8
+ Usage:
9
+ python run_liar_benchmark.py --split test
10
+ python run_liar_benchmark.py --sample 100 --verbose
11
+ python run_liar_benchmark.py --split test --output results/liar_benchmark.csv
12
+
13
+ (c) Dominique S. Loyer - PhD Thesis Prototype
14
+ """
15
+
16
+ import argparse
17
+ import json
18
+ import time
19
+ import sys
20
+ from pathlib import Path
21
+ from datetime import datetime
22
+ from typing import Dict, List, Any, Optional
23
+ from collections import Counter
24
+
25
+ # Add parent to path for imports
26
+ sys.path.insert(0, str(Path(__file__).parent.parent))
27
+
28
+ try:
29
+ import pandas as pd
30
+ HAS_PANDAS = True
31
+ except ImportError:
32
+ HAS_PANDAS = False
33
+ print("[Warning] pandas not installed. CSV export will be limited.")
34
+
35
+ try:
36
+ from sklearn.metrics import (
37
+ accuracy_score, precision_score, recall_score, f1_score,
38
+ confusion_matrix, classification_report
39
+ )
40
+ HAS_SKLEARN = True
41
+ except ImportError:
42
+ HAS_SKLEARN = False
43
+ print("[Warning] sklearn not installed. Using basic metrics.")
44
+
45
+ from syscred.liar_dataset import LIARDataset, LiarStatement
46
+ from syscred.verification_system import CredibilityVerificationSystem
47
+ from syscred import config
48
+
49
+
50
+ class LIARBenchmark:
51
+ """
52
+ Benchmark runner for evaluating SysCRED on LIAR dataset.
53
+ """
54
+
55
+ # Map SysCRED score to binary label
56
+ SYSCRED_THRESHOLD = 0.5 # Below = Fake, Above = Real
57
+
58
+ def __init__(
59
+ self,
60
+ data_dir: Optional[str] = None,
61
+ load_ml: bool = True,
62
+ use_graphrag: bool = True
63
+ ):
64
+ """
65
+ Initialize benchmark.
66
+
67
+ Args:
68
+ data_dir: Path to LIAR dataset directory
69
+ load_ml: Whether to load ML models
70
+ use_graphrag: Whether to use GraphRAG context
71
+ """
72
+ print("=" * 60)
73
+ print("SysCRED LIAR Benchmark Runner")
74
+ print("=" * 60)
75
+
76
+ # Load dataset
77
+ self.dataset = LIARDataset(data_dir)
78
+
79
+ # Initialize SysCRED
80
+ print("\n[Benchmark] Initializing SysCRED...")
81
+ self.system = CredibilityVerificationSystem(
82
+ ontology_base_path=str(config.Config.ONTOLOGY_BASE_PATH),
83
+ ontology_data_path=str(config.Config.ONTOLOGY_DATA_PATH),
84
+ load_ml_models=load_ml,
85
+ google_api_key=config.Config.GOOGLE_FACT_CHECK_API_KEY
86
+ )
87
+
88
+ self.use_graphrag = use_graphrag
89
+ self.results: List[Dict] = []
90
+
91
+ print("[Benchmark] System ready.\n")
92
+
93
+ def _syscred_to_binary(self, score: float) -> str:
94
+ """Convert SysCRED score to binary label."""
95
+ return "Real" if score >= self.SYSCRED_THRESHOLD else "Fake"
96
+
97
+ def _syscred_to_ternary(self, score: float) -> str:
98
+ """Convert SysCRED score to ternary label."""
99
+ if score >= 0.65:
100
+ return "True"
101
+ elif score >= 0.35:
102
+ return "Mixed"
103
+ else:
104
+ return "False"
105
+
106
+ def evaluate_statement(self, statement: LiarStatement) -> Dict[str, Any]:
107
+ """
108
+ Evaluate a single statement.
109
+
110
+ Args:
111
+ statement: LiarStatement to evaluate
112
+
113
+ Returns:
114
+ Result dictionary with prediction and ground truth
115
+ """
116
+ start_time = time.time()
117
+
118
+ result = {
119
+ 'id': statement.id,
120
+ 'statement': statement.statement[:200],
121
+ 'ground_truth_6way': statement.label.name,
122
+ 'ground_truth_binary': statement.binary_label,
123
+ 'ground_truth_ternary': statement.ternary_label,
124
+ 'speaker': statement.speaker,
125
+ 'party': statement.party,
126
+ 'syscred_score': 0.5,
127
+ 'predicted_binary': 'Unknown',
128
+ 'predicted_ternary': 'Unknown',
129
+ 'binary_correct': False,
130
+ 'ternary_correct': False,
131
+ 'processing_time': 0,
132
+ 'error': None
133
+ }
134
+
135
+ try:
136
+ # Run SysCRED analysis on the statement text
137
+ # Note: LIAR statements are short claims, not URLs
138
+ report = self.system.verify_information(statement.statement)
139
+
140
+ if 'error' not in report:
141
+ score = report.get('scoreCredibilite', 0.5)
142
+ result['syscred_score'] = score
143
+ result['predicted_binary'] = self._syscred_to_binary(score)
144
+ result['predicted_ternary'] = self._syscred_to_ternary(score)
145
+
146
+ # Check correctness
147
+ result['binary_correct'] = (result['predicted_binary'] == result['ground_truth_binary'])
148
+ result['ternary_correct'] = (result['predicted_ternary'] == result['ground_truth_ternary'])
149
+
150
+ # Add extra details if available
151
+ if 'analyseNLP' in report:
152
+ result['sentiment'] = report['analyseNLP'].get('sentiment', {})
153
+ result['bias'] = report['analyseNLP'].get('bias_analysis', {})
154
+ else:
155
+ result['error'] = report['error']
156
+
157
+ except Exception as e:
158
+ result['error'] = str(e)
159
+
160
+ result['processing_time'] = time.time() - start_time
161
+
162
+ return result
163
+
164
+ def run_benchmark(
165
+ self,
166
+ split: str = "test",
167
+ sample_size: Optional[int] = None,
168
+ verbose: bool = False
169
+ ) -> Dict[str, Any]:
170
+ """
171
+ Run full benchmark on a dataset split.
172
+
173
+ Args:
174
+ split: 'train', 'valid', or 'test'
175
+ sample_size: If set, only evaluate this many statements
176
+ verbose: Print progress for each statement
177
+
178
+ Returns:
179
+ Dictionary with metrics and detailed results
180
+ """
181
+ print(f"\n[Benchmark] Running on {split} split...")
182
+
183
+ # Load dataset
184
+ statements = self.dataset.load_split(split)
185
+
186
+ if sample_size:
187
+ import random
188
+ statements = random.sample(statements, min(sample_size, len(statements)))
189
+ print(f"[Benchmark] Using sample of {len(statements)} statements")
190
+
191
+ total = len(statements)
192
+ self.results = []
193
+
194
+ # Progress tracking
195
+ start_time = time.time()
196
+
197
+ for i, stmt in enumerate(statements):
198
+ if verbose or (i + 1) % 50 == 0:
199
+ print(f"[{i+1}/{total}] Processing: {stmt.statement[:50]}...")
200
+
201
+ result = self.evaluate_statement(stmt)
202
+ self.results.append(result)
203
+
204
+ if verbose:
205
+ symbol = "✅" if result['binary_correct'] else "❌"
206
+ print(f" -> Score: {result['syscred_score']:.2f} | "
207
+ f"Pred: {result['predicted_binary']} | "
208
+ f"True: {result['ground_truth_binary']} {symbol}")
209
+
210
+ elapsed = time.time() - start_time
211
+
212
+ # Calculate metrics
213
+ metrics = self._calculate_metrics()
214
+ metrics['elapsed_time'] = elapsed
215
+ metrics['statements_per_second'] = total / elapsed if elapsed > 0 else 0
216
+
217
+ return metrics
218
+
219
+ def _calculate_metrics(self) -> Dict[str, Any]:
220
+ """Calculate evaluation metrics from results."""
221
+
222
+ if not self.results:
223
+ return {'error': 'No results to evaluate'}
224
+
225
+ # Filter successful evaluations
226
+ valid_results = [r for r in self.results if r['error'] is None]
227
+ error_count = len(self.results) - len(valid_results)
228
+
229
+ if not valid_results:
230
+ return {'error': 'All evaluations failed'}
231
+
232
+ metrics = {
233
+ 'total_statements': len(self.results),
234
+ 'successful_evaluations': len(valid_results),
235
+ 'error_count': error_count,
236
+ 'error_rate': error_count / len(self.results)
237
+ }
238
+
239
+ # Extract labels
240
+ y_true_binary = [r['ground_truth_binary'] for r in valid_results]
241
+ y_pred_binary = [r['predicted_binary'] for r in valid_results]
242
+
243
+ y_true_ternary = [r['ground_truth_ternary'] for r in valid_results]
244
+ y_pred_ternary = [r['predicted_ternary'] for r in valid_results]
245
+
246
+ # Binary metrics
247
+ if HAS_SKLEARN:
248
+ metrics['binary'] = {
249
+ 'accuracy': accuracy_score(y_true_binary, y_pred_binary),
250
+ 'precision': precision_score(y_true_binary, y_pred_binary, pos_label='Fake', zero_division=0),
251
+ 'recall': recall_score(y_true_binary, y_pred_binary, pos_label='Fake', zero_division=0),
252
+ 'f1': f1_score(y_true_binary, y_pred_binary, pos_label='Fake', zero_division=0),
253
+ 'confusion_matrix': confusion_matrix(y_true_binary, y_pred_binary, labels=['Fake', 'Real']).tolist()
254
+ }
255
+
256
+ metrics['ternary'] = {
257
+ 'accuracy': accuracy_score(y_true_ternary, y_pred_ternary),
258
+ 'macro_f1': f1_score(y_true_ternary, y_pred_ternary, average='macro', zero_division=0),
259
+ 'confusion_matrix': confusion_matrix(y_true_ternary, y_pred_ternary,
260
+ labels=['False', 'Mixed', 'True']).tolist()
261
+ }
262
+
263
+ # Detailed classification report
264
+ metrics['classification_report'] = classification_report(
265
+ y_true_binary, y_pred_binary,
266
+ target_names=['Fake', 'Real'],
267
+ output_dict=True
268
+ )
269
+ else:
270
+ # Basic metrics without sklearn
271
+ correct_binary = sum(1 for r in valid_results if r['binary_correct'])
272
+ correct_ternary = sum(1 for r in valid_results if r['ternary_correct'])
273
+
274
+ metrics['binary'] = {
275
+ 'accuracy': correct_binary / len(valid_results),
276
+ 'correct': correct_binary,
277
+ 'incorrect': len(valid_results) - correct_binary
278
+ }
279
+
280
+ metrics['ternary'] = {
281
+ 'accuracy': correct_ternary / len(valid_results),
282
+ 'correct': correct_ternary,
283
+ 'incorrect': len(valid_results) - correct_ternary
284
+ }
285
+
286
+ # Score distribution
287
+ scores = [r['syscred_score'] for r in valid_results]
288
+ metrics['score_distribution'] = {
289
+ 'mean': sum(scores) / len(scores),
290
+ 'min': min(scores),
291
+ 'max': max(scores),
292
+ 'median': sorted(scores)[len(scores) // 2]
293
+ }
294
+
295
+ # Per-party analysis
296
+ party_results = {}
297
+ for party in ['republican', 'democrat']:
298
+ party_items = [r for r in valid_results if r['party'].lower() == party]
299
+ if party_items:
300
+ party_correct = sum(1 for r in party_items if r['binary_correct'])
301
+ party_results[party] = {
302
+ 'count': len(party_items),
303
+ 'accuracy': party_correct / len(party_items)
304
+ }
305
+ metrics['per_party'] = party_results
306
+
307
+ return metrics
308
+
309
+ def print_results(self, metrics: Dict[str, Any]) -> None:
310
+ """Pretty-print benchmark results."""
311
+ print("\n" + "=" * 60)
312
+ print("LIAR BENCHMARK RESULTS")
313
+ print("=" * 60)
314
+
315
+ print(f"\n📊 Overview:")
316
+ print(f" Total Statements: {metrics.get('total_statements', 0)}")
317
+ print(f" Successful: {metrics.get('successful_evaluations', 0)}")
318
+ print(f" Errors: {metrics.get('error_count', 0)} ({metrics.get('error_rate', 0):.1%})")
319
+ print(f" Processing Time: {metrics.get('elapsed_time', 0):.1f}s")
320
+ print(f" Speed: {metrics.get('statements_per_second', 0):.2f} stmt/sec")
321
+
322
+ if 'binary' in metrics:
323
+ print(f"\n📈 Binary Classification (Fake vs Real):")
324
+ b = metrics['binary']
325
+ print(f" Accuracy: {b.get('accuracy', 0):.2%}")
326
+ print(f" Precision: {b.get('precision', 0):.2%}")
327
+ print(f" Recall: {b.get('recall', 0):.2%}")
328
+ print(f" F1-Score: {b.get('f1', 0):.2f}")
329
+
330
+ if 'confusion_matrix' in b:
331
+ cm = b['confusion_matrix']
332
+ print(f"\n Confusion Matrix:")
333
+ print(f" Pred Fake Pred Real")
334
+ print(f" True Fake {cm[0][0]:5d} {cm[0][1]:5d}")
335
+ print(f" True Real {cm[1][0]:5d} {cm[1][1]:5d}")
336
+
337
+ if 'ternary' in metrics:
338
+ print(f"\n📊 Ternary Classification (False/Mixed/True):")
339
+ t = metrics['ternary']
340
+ print(f" Accuracy: {t.get('accuracy', 0):.2%}")
341
+ print(f" Macro F1: {t.get('macro_f1', 0):.2f}")
342
+
343
+ if 'per_party' in metrics:
344
+ print(f"\n🏛️ Per-Party Analysis:")
345
+ for party, data in metrics['per_party'].items():
346
+ print(f" {party.capitalize()}: {data['accuracy']:.2%} accuracy ({data['count']} samples)")
347
+
348
+ if 'score_distribution' in metrics:
349
+ print(f"\n📉 Score Distribution:")
350
+ sd = metrics['score_distribution']
351
+ print(f" Mean: {sd['mean']:.3f}")
352
+ print(f" Median: {sd['median']:.3f}")
353
+ print(f" Range: [{sd['min']:.3f}, {sd['max']:.3f}]")
354
+
355
+ print("\n" + "=" * 60)
356
+
357
+ def save_results(self, output_path: str, metrics: Dict[str, Any]) -> None:
358
+ """Save results to files."""
359
+ output = Path(output_path)
360
+ output.parent.mkdir(parents=True, exist_ok=True)
361
+
362
+ # Save detailed results as CSV
363
+ if HAS_PANDAS and self.results:
364
+ df = pd.DataFrame(self.results)
365
+ csv_path = output.with_suffix('.csv')
366
+ df.to_csv(csv_path, index=False)
367
+ print(f"[Benchmark] Results saved to: {csv_path}")
368
+
369
+ # Save metrics as JSON
370
+ json_path = output.with_suffix('.json')
371
+ with open(json_path, 'w') as f:
372
+ json.dump({
373
+ 'timestamp': datetime.now().isoformat(),
374
+ 'dataset': 'LIAR',
375
+ 'metrics': metrics,
376
+ 'config': {
377
+ 'threshold': self.SYSCRED_THRESHOLD,
378
+ 'use_graphrag': self.use_graphrag,
379
+ 'weights': dict(self.system.weights)
380
+ }
381
+ }, f, indent=2, default=str)
382
+ print(f"[Benchmark] Metrics saved to: {json_path}")
383
+
384
+
385
+ def main():
386
+ parser = argparse.ArgumentParser(description='Run LIAR benchmark on SysCRED')
387
+ parser.add_argument('--split', type=str, default='test',
388
+ choices=['train', 'valid', 'test'],
389
+ help='Dataset split to evaluate')
390
+ parser.add_argument('--sample', type=int, default=None,
391
+ help='Number of statements to sample (for quick testing)')
392
+ parser.add_argument('--data-dir', type=str, default=None,
393
+ help='Path to LIAR dataset directory')
394
+ parser.add_argument('--output', type=str, default=None,
395
+ help='Output path for results (CSV/JSON)')
396
+ parser.add_argument('--no-ml', action='store_true',
397
+ help='Disable ML models for faster testing')
398
+ parser.add_argument('--verbose', '-v', action='store_true',
399
+ help='Print details for each statement')
400
+
401
+ args = parser.parse_args()
402
+
403
+ # Run benchmark
404
+ benchmark = LIARBenchmark(
405
+ data_dir=args.data_dir,
406
+ load_ml=not args.no_ml
407
+ )
408
+
409
+ try:
410
+ metrics = benchmark.run_benchmark(
411
+ split=args.split,
412
+ sample_size=args.sample,
413
+ verbose=args.verbose
414
+ )
415
+
416
+ benchmark.print_results(metrics)
417
+
418
+ if args.output:
419
+ benchmark.save_results(args.output, metrics)
420
+ else:
421
+ # Default output path
422
+ default_output = Path(__file__).parent / f"liar_benchmark_{args.split}.csv"
423
+ benchmark.save_results(str(default_output), metrics)
424
+
425
+ except FileNotFoundError as e:
426
+ print(f"\n❌ Error: {e}")
427
+ print("\nTo download the LIAR dataset:")
428
+ print(" 1. wget https://www.cs.ucsb.edu/~william/data/liar_dataset.zip")
429
+ print(" 2. unzip liar_dataset.zip -d 02_Code/syscred/datasets/liar/")
430
+ sys.exit(1)
431
+
432
+
433
+ if __name__ == "__main__":
434
+ main()
syscred/run_liar_benchmark_remote.py ADDED
@@ -0,0 +1,373 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ LIAR Benchmark via Hugging Face Space API
5
+ ==========================================
6
+ Runs the LIAR benchmark against the remote SysCRED instance on HF Space.
7
+ This uses the full ML pipeline (PyTorch, Transformers) running in the cloud.
8
+
9
+ Usage:
10
+ python run_liar_benchmark_remote.py --sample 100
11
+ python run_liar_benchmark_remote.py --split test --url https://your-space.hf.space
12
+
13
+ (c) Dominique S. Loyer - PhD Thesis Prototype
14
+ """
15
+
16
+ import argparse
17
+ import json
18
+ import time
19
+ import sys
20
+ from pathlib import Path
21
+ from datetime import datetime
22
+ from typing import Dict, List, Any, Optional
23
+ import requests
24
+
25
+ try:
26
+ import pandas as pd
27
+ HAS_PANDAS = True
28
+ except ImportError:
29
+ HAS_PANDAS = False
30
+
31
+ try:
32
+ from sklearn.metrics import (
33
+ accuracy_score, precision_score, recall_score, f1_score,
34
+ confusion_matrix, classification_report
35
+ )
36
+ HAS_SKLEARN = True
37
+ except ImportError:
38
+ HAS_SKLEARN = False
39
+
40
+ # Add parent to path for imports
41
+ sys.path.insert(0, str(Path(__file__).parent.parent))
42
+ from syscred.liar_dataset import LIARDataset, LiarStatement
43
+
44
+
45
+ class RemoteLIARBenchmark:
46
+ """
47
+ Benchmark runner using remote HF Space API.
48
+ """
49
+
50
+ # Default HF Space URL
51
+ DEFAULT_API_URL = "https://domloyer-syscred.hf.space"
52
+
53
+ SYSCRED_THRESHOLD = 0.5 # Below = Fake, Above = Real
54
+
55
+ def __init__(
56
+ self,
57
+ api_url: Optional[str] = None,
58
+ data_dir: Optional[str] = None,
59
+ timeout: int = 60
60
+ ):
61
+ """
62
+ Initialize remote benchmark.
63
+
64
+ Args:
65
+ api_url: HF Space API URL
66
+ data_dir: Path to LIAR dataset
67
+ timeout: Request timeout in seconds
68
+ """
69
+ print("=" * 60)
70
+ print("SysCRED LIAR Benchmark (Remote HF Space)")
71
+ print("=" * 60)
72
+
73
+ self.api_url = (api_url or self.DEFAULT_API_URL).rstrip('/')
74
+ self.timeout = timeout
75
+
76
+ # Test connection
77
+ print(f"\n[Remote] API URL: {self.api_url}")
78
+ self._test_connection()
79
+
80
+ # Load dataset
81
+ self.dataset = LIARDataset(data_dir)
82
+ self.results: List[Dict] = []
83
+
84
+ print("[Remote] Ready.\n")
85
+
86
+ def _test_connection(self):
87
+ """Test API connectivity."""
88
+ try:
89
+ response = requests.get(f"{self.api_url}/api/health", timeout=10)
90
+ if response.status_code == 200:
91
+ print("[Remote] ✓ API connection successful")
92
+ else:
93
+ print(f"[Remote] ⚠ API returned status {response.status_code}")
94
+ except requests.exceptions.ConnectionError:
95
+ print("[Remote] ⚠ Could not connect to API (may be sleeping)")
96
+ print("[Remote] The first request will wake it up...")
97
+ except Exception as e:
98
+ print(f"[Remote] ⚠ Connection test failed: {e}")
99
+
100
+ def _call_api(self, text: str) -> Dict[str, Any]:
101
+ """Call the SysCRED API."""
102
+ try:
103
+ response = requests.post(
104
+ f"{self.api_url}/api/verify",
105
+ json={"input": text},
106
+ timeout=self.timeout,
107
+ headers={"Content-Type": "application/json"}
108
+ )
109
+
110
+ if response.status_code == 200:
111
+ return response.json()
112
+ else:
113
+ return {"error": f"HTTP {response.status_code}: {response.text[:100]}"}
114
+
115
+ except requests.exceptions.Timeout:
116
+ return {"error": "Request timeout"}
117
+ except requests.exceptions.ConnectionError:
118
+ return {"error": "Connection error"}
119
+ except Exception as e:
120
+ return {"error": str(e)}
121
+
122
+ def _syscred_to_binary(self, score: float) -> str:
123
+ """Convert SysCRED score to binary label."""
124
+ return "Real" if score >= self.SYSCRED_THRESHOLD else "Fake"
125
+
126
+ def _syscred_to_ternary(self, score: float) -> str:
127
+ """Convert SysCRED score to ternary label."""
128
+ if score >= 0.65:
129
+ return "True"
130
+ elif score >= 0.35:
131
+ return "Mixed"
132
+ else:
133
+ return "False"
134
+
135
+ def evaluate_statement(self, statement: LiarStatement) -> Dict[str, Any]:
136
+ """Evaluate a single statement via API."""
137
+ start_time = time.time()
138
+
139
+ result = {
140
+ 'id': statement.id,
141
+ 'statement': statement.statement[:200],
142
+ 'ground_truth_6way': statement.label.name,
143
+ 'ground_truth_binary': statement.binary_label,
144
+ 'ground_truth_ternary': statement.ternary_label,
145
+ 'speaker': statement.speaker,
146
+ 'party': statement.party,
147
+ 'syscred_score': 0.5,
148
+ 'predicted_binary': 'Unknown',
149
+ 'predicted_ternary': 'Unknown',
150
+ 'binary_correct': False,
151
+ 'ternary_correct': False,
152
+ 'processing_time': 0,
153
+ 'error': None,
154
+ 'ml_used': False
155
+ }
156
+
157
+ # Call remote API
158
+ api_result = self._call_api(statement.statement)
159
+
160
+ if 'error' not in api_result:
161
+ score = api_result.get('scoreCredibilite', 0.5)
162
+ result['syscred_score'] = score
163
+ result['predicted_binary'] = self._syscred_to_binary(score)
164
+ result['predicted_ternary'] = self._syscred_to_ternary(score)
165
+
166
+ result['binary_correct'] = (result['predicted_binary'] == result['ground_truth_binary'])
167
+ result['ternary_correct'] = (result['predicted_ternary'] == result['ground_truth_ternary'])
168
+
169
+ # Check if ML was used
170
+ nlp = api_result.get('analyseNLP', {})
171
+ result['ml_used'] = nlp.get('sentiment') is not None
172
+
173
+ # GraphRAG info
174
+ graphrag = api_result.get('graphRAG', {})
175
+ result['graph_context_score'] = graphrag.get('context_score')
176
+ result['graph_has_history'] = graphrag.get('has_history', False)
177
+ else:
178
+ result['error'] = api_result['error']
179
+
180
+ result['processing_time'] = time.time() - start_time
181
+
182
+ return result
183
+
184
+ def run_benchmark(
185
+ self,
186
+ split: str = "test",
187
+ sample_size: Optional[int] = None,
188
+ verbose: bool = False
189
+ ) -> Dict[str, Any]:
190
+ """Run benchmark against remote API."""
191
+ print(f"\n[Remote] Running on {split} split via HF Space API...")
192
+
193
+ statements = self.dataset.load_split(split)
194
+
195
+ if sample_size:
196
+ import random
197
+ statements = random.sample(statements, min(sample_size, len(statements)))
198
+ print(f"[Remote] Using sample of {len(statements)} statements")
199
+
200
+ total = len(statements)
201
+ self.results = []
202
+ ml_used_count = 0
203
+
204
+ start_time = time.time()
205
+
206
+ for i, stmt in enumerate(statements):
207
+ if verbose or (i + 1) % 10 == 0:
208
+ print(f"[{i+1}/{total}] Processing: {stmt.statement[:50]}...")
209
+
210
+ result = self.evaluate_statement(stmt)
211
+ self.results.append(result)
212
+
213
+ if result.get('ml_used'):
214
+ ml_used_count += 1
215
+
216
+ if verbose and not result.get('error'):
217
+ symbol = "✅" if result['binary_correct'] else "❌"
218
+ ml = "🧠" if result['ml_used'] else "📊"
219
+ print(f" -> Score: {result['syscred_score']:.2f} {ml} | "
220
+ f"Pred: {result['predicted_binary']} | "
221
+ f"True: {result['ground_truth_binary']} {symbol}")
222
+
223
+ # Rate limiting - be nice to the API
224
+ if i < total - 1:
225
+ time.sleep(0.5)
226
+
227
+ elapsed = time.time() - start_time
228
+
229
+ metrics = self._calculate_metrics()
230
+ metrics['elapsed_time'] = elapsed
231
+ metrics['statements_per_second'] = total / elapsed if elapsed > 0 else 0
232
+ metrics['ml_used_percentage'] = (ml_used_count / total * 100) if total > 0 else 0
233
+ metrics['api_url'] = self.api_url
234
+
235
+ return metrics
236
+
237
+ def _calculate_metrics(self) -> Dict[str, Any]:
238
+ """Calculate evaluation metrics."""
239
+ if not self.results:
240
+ return {'error': 'No results'}
241
+
242
+ valid_results = [r for r in self.results if r['error'] is None]
243
+ error_count = len(self.results) - len(valid_results)
244
+
245
+ if not valid_results:
246
+ return {'error': 'All evaluations failed'}
247
+
248
+ metrics = {
249
+ 'total_statements': len(self.results),
250
+ 'successful_evaluations': len(valid_results),
251
+ 'error_count': error_count,
252
+ 'error_rate': error_count / len(self.results)
253
+ }
254
+
255
+ y_true_binary = [r['ground_truth_binary'] for r in valid_results]
256
+ y_pred_binary = [r['predicted_binary'] for r in valid_results]
257
+
258
+ y_true_ternary = [r['ground_truth_ternary'] for r in valid_results]
259
+ y_pred_ternary = [r['predicted_ternary'] for r in valid_results]
260
+
261
+ if HAS_SKLEARN:
262
+ metrics['binary'] = {
263
+ 'accuracy': accuracy_score(y_true_binary, y_pred_binary),
264
+ 'precision': precision_score(y_true_binary, y_pred_binary, pos_label='Fake', zero_division=0),
265
+ 'recall': recall_score(y_true_binary, y_pred_binary, pos_label='Fake', zero_division=0),
266
+ 'f1': f1_score(y_true_binary, y_pred_binary, pos_label='Fake', zero_division=0),
267
+ 'confusion_matrix': confusion_matrix(y_true_binary, y_pred_binary, labels=['Fake', 'Real']).tolist()
268
+ }
269
+
270
+ metrics['ternary'] = {
271
+ 'accuracy': accuracy_score(y_true_ternary, y_pred_ternary),
272
+ 'macro_f1': f1_score(y_true_ternary, y_pred_ternary, average='macro', zero_division=0),
273
+ }
274
+ else:
275
+ correct_binary = sum(1 for r in valid_results if r['binary_correct'])
276
+ metrics['binary'] = {'accuracy': correct_binary / len(valid_results)}
277
+
278
+ scores = [r['syscred_score'] for r in valid_results]
279
+ metrics['score_distribution'] = {
280
+ 'mean': sum(scores) / len(scores),
281
+ 'min': min(scores),
282
+ 'max': max(scores),
283
+ }
284
+
285
+ return metrics
286
+
287
+ def print_results(self, metrics: Dict[str, Any]) -> None:
288
+ """Print benchmark results."""
289
+ print("\n" + "=" * 60)
290
+ print("LIAR BENCHMARK RESULTS (Remote HF Space)")
291
+ print("=" * 60)
292
+
293
+ print(f"\n🌐 API: {metrics.get('api_url', 'N/A')}")
294
+ print(f"🧠 ML Models Used: {metrics.get('ml_used_percentage', 0):.1f}%")
295
+
296
+ print(f"\n📊 Overview:")
297
+ print(f" Total: {metrics.get('total_statements', 0)}")
298
+ print(f" Success: {metrics.get('successful_evaluations', 0)}")
299
+ print(f" Errors: {metrics.get('error_count', 0)}")
300
+ print(f" Time: {metrics.get('elapsed_time', 0):.1f}s")
301
+
302
+ if 'binary' in metrics:
303
+ print(f"\n📈 Binary Classification:")
304
+ b = metrics['binary']
305
+ print(f" Accuracy: {b.get('accuracy', 0):.2%}")
306
+ print(f" Precision: {b.get('precision', 0):.2%}")
307
+ print(f" Recall: {b.get('recall', 0):.2%}")
308
+ print(f" F1-Score: {b.get('f1', 0):.2f}")
309
+
310
+ print("\n" + "=" * 60)
311
+
312
+ def save_results(self, output_path: str, metrics: Dict[str, Any]) -> None:
313
+ """Save results."""
314
+ output = Path(output_path)
315
+ output.parent.mkdir(parents=True, exist_ok=True)
316
+
317
+ if HAS_PANDAS and self.results:
318
+ df = pd.DataFrame(self.results)
319
+ csv_path = output.with_suffix('.csv')
320
+ df.to_csv(csv_path, index=False)
321
+ print(f"[Remote] Results: {csv_path}")
322
+
323
+ json_path = output.with_suffix('.json')
324
+ with open(json_path, 'w') as f:
325
+ json.dump({
326
+ 'timestamp': datetime.now().isoformat(),
327
+ 'dataset': 'LIAR',
328
+ 'mode': 'remote',
329
+ 'metrics': metrics
330
+ }, f, indent=2, default=str)
331
+ print(f"[Remote] Metrics: {json_path}")
332
+
333
+
334
+ def main():
335
+ parser = argparse.ArgumentParser(description='LIAR benchmark via HF Space API')
336
+ parser.add_argument('--url', type=str, default=None,
337
+ help='HF Space API URL')
338
+ parser.add_argument('--split', type=str, default='test',
339
+ choices=['train', 'valid', 'test'])
340
+ parser.add_argument('--sample', type=int, default=None,
341
+ help='Number of statements to sample')
342
+ parser.add_argument('--data-dir', type=str, default=None)
343
+ parser.add_argument('--output', type=str, default=None)
344
+ parser.add_argument('--verbose', '-v', action='store_true')
345
+ parser.add_argument('--timeout', type=int, default=60)
346
+
347
+ args = parser.parse_args()
348
+
349
+ benchmark = RemoteLIARBenchmark(
350
+ api_url=args.url,
351
+ data_dir=args.data_dir,
352
+ timeout=args.timeout
353
+ )
354
+
355
+ try:
356
+ metrics = benchmark.run_benchmark(
357
+ split=args.split,
358
+ sample_size=args.sample,
359
+ verbose=args.verbose
360
+ )
361
+
362
+ benchmark.print_results(metrics)
363
+
364
+ output = args.output or f"liar_benchmark_remote_{args.split}.csv"
365
+ benchmark.save_results(output, metrics)
366
+
367
+ except FileNotFoundError as e:
368
+ print(f"\n❌ Error: {e}")
369
+ sys.exit(1)
370
+
371
+
372
+ if __name__ == "__main__":
373
+ main()
syscred/static/index.html CHANGED
@@ -333,6 +333,69 @@
333
  transform: none;
334
  box-shadow: none;
335
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
336
  </style>
337
  </head>
338
 
@@ -345,12 +408,23 @@
345
 
346
  <div class="search-box">
347
  <div class="input-group">
348
- <input type="text" id="urlInput" placeholder="Entrez une URL à analyser (ex: https://www.lemonde.fr)"
349
  autofocus>
350
  <button id="analyzeBtn" onclick="analyzeUrl()">
351
  🔍 Analyser
352
  </button>
353
  </div>
 
 
 
 
 
 
 
 
 
 
 
354
  </div>
355
 
356
  <div class="loading" id="loading">
@@ -393,7 +467,32 @@
393
  </div>
394
 
395
  <script>
396
- const API_URL = 'http://localhost:5001';
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
397
 
398
  async function analyzeUrl() {
399
  const urlInput = document.getElementById('urlInput');
 
333
  transform: none;
334
  box-shadow: none;
335
  }
336
+ /* Backend Toggle Switch */
337
+ .backend-toggle {
338
+ display: flex;
339
+ align-items: center;
340
+ justify-content: center;
341
+ gap: 0.75rem;
342
+ margin-top: 1rem;
343
+ padding: 0.75rem;
344
+ background: rgba(0,0,0,0.2);
345
+ border-radius: 10px;
346
+ }
347
+ .backend-toggle label {
348
+ font-size: 0.85rem;
349
+ color: #8b8ba7;
350
+ cursor: pointer;
351
+ }
352
+ .backend-toggle .active {
353
+ color: #a855f7;
354
+ font-weight: 600;
355
+ }
356
+ .toggle-switch {
357
+ position: relative;
358
+ width: 50px;
359
+ height: 26px;
360
+ }
361
+ .toggle-switch input {
362
+ opacity: 0;
363
+ width: 0;
364
+ height: 0;
365
+ }
366
+ .toggle-slider {
367
+ position: absolute;
368
+ cursor: pointer;
369
+ top: 0; left: 0; right: 0; bottom: 0;
370
+ background: linear-gradient(135deg, #22c55e, #16a34a);
371
+ border-radius: 26px;
372
+ transition: 0.3s;
373
+ }
374
+ .toggle-slider:before {
375
+ position: absolute;
376
+ content: '';
377
+ height: 20px;
378
+ width: 20px;
379
+ left: 3px;
380
+ bottom: 3px;
381
+ background: white;
382
+ border-radius: 50%;
383
+ transition: 0.3s;
384
+ }
385
+ .toggle-switch input:checked + .toggle-slider {
386
+ background: linear-gradient(135deg, #7c3aed, #a855f7);
387
+ }
388
+ .toggle-switch input:checked + .toggle-slider:before {
389
+ transform: translateX(24px);
390
+ }
391
+ .backend-status {
392
+ font-size: 0.75rem;
393
+ color: #6b6b8a;
394
+ text-align: center;
395
+ margin-top: 0.5rem;
396
+ }
397
+ .backend-status.local { color: #22c55e; }
398
+ .backend-status.remote { color: #a855f7; }
399
  </style>
400
  </head>
401
 
 
408
 
409
  <div class="search-box">
410
  <div class="input-group">
411
+ <input type="text" id="urlInput" placeholder="Entrez une URL ou du texte à analyser"
412
  autofocus>
413
  <button id="analyzeBtn" onclick="analyzeUrl()">
414
  🔍 Analyser
415
  </button>
416
  </div>
417
+
418
+ <!-- Backend Toggle -->
419
+ <div class="backend-toggle">
420
+ <label id="labelLocal" class="active">🖥️ Local</label>
421
+ <div class="toggle-switch">
422
+ <input type="checkbox" id="backendToggle" onchange="toggleBackend()">
423
+ <span class="toggle-slider"></span>
424
+ </div>
425
+ <label id="labelRemote">☁️ HF Space</label>
426
+ </div>
427
+ <div class="backend-status local" id="backendStatus">Backend: localhost:5001 (léger, sans ML)</div>
428
  </div>
429
 
430
  <div class="loading" id="loading">
 
467
  </div>
468
 
469
  <script>
470
+ // Backend URLs
471
+ const LOCAL_API_URL = 'http://localhost:5001';
472
+ const REMOTE_API_URL = 'https://domloyer-syscred.hf.space';
473
+ let API_URL = LOCAL_API_URL;
474
+
475
+ function toggleBackend() {
476
+ const toggle = document.getElementById('backendToggle');
477
+ const status = document.getElementById('backendStatus');
478
+ const labelLocal = document.getElementById('labelLocal');
479
+ const labelRemote = document.getElementById('labelRemote');
480
+
481
+ if (toggle.checked) {
482
+ API_URL = REMOTE_API_URL;
483
+ status.textContent = 'Backend: HF Space (ML complet, plus lent)';
484
+ status.className = 'backend-status remote';
485
+ labelLocal.classList.remove('active');
486
+ labelRemote.classList.add('active');
487
+ } else {
488
+ API_URL = LOCAL_API_URL;
489
+ status.textContent = 'Backend: localhost:5001 (léger, sans ML)';
490
+ status.className = 'backend-status local';
491
+ labelLocal.classList.add('active');
492
+ labelRemote.classList.remove('active');
493
+ }
494
+ console.log('[SysCRED] Backend switched to:', API_URL);
495
+ }
496
 
497
  async function analyzeUrl() {
498
  const urlInput = document.getElementById('urlInput');
syscred/verification_system.py CHANGED
@@ -464,14 +464,28 @@ class CredibilityVerificationSystem:
464
  adjustments += w_ent * boost
465
  total_weight_used += w_ent
466
 
467
- # 6. Text Coherence (15%) (Vocabulary Diversity)
468
- w_coh = self.weights.get('coherence', 0.15)
469
  coherence = nlp_results.get('coherence_score')
470
  if coherence is not None:
471
  # Coherence is usually 0.0 to 1.0
472
  # Center around 0.5: >0.5 improves, <0.5 penalizes
473
  adjustments += (coherence - 0.5) * w_coh
474
  total_weight_used += w_coh
 
 
 
 
 
 
 
 
 
 
 
 
 
 
475
 
476
  # Final calculation
477
  # Base 0.5 + sum of weighted adjustments
@@ -648,6 +662,15 @@ class CredibilityVerificationSystem:
648
  'coherence_score': nlp_results.get('coherence_score'),
649
  'sentiment_explanation_preview': (nlp_results.get('sentiment_explanation') or [])[:3]
650
  },
 
 
 
 
 
 
 
 
 
651
  # [NEW] TREC Evidence section
652
  'evidences': evidences or [],
653
  'metadonnees': {}
@@ -758,6 +781,20 @@ class CredibilityVerificationSystem:
758
  'weight': f"{int(self.weights.get('sentiment_neutrality',0)*100)}%",
759
  'impact': '-' if sent.get('score', 0) > 0.9 else '0'
760
  })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
761
 
762
  return factors
763
 
@@ -828,31 +865,44 @@ class CredibilityVerificationSystem:
828
  print("[SysCRED] Running rule-based analysis...")
829
  rule_results = self.rule_based_analysis(cleaned_text, external_data)
830
 
831
- # 5. NLP analysis
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
832
  print("[SysCRED] Running NLP analysis...")
833
  nlp_results = self.nlp_analysis(cleaned_text)
834
 
835
- # 6. Calculate score
836
  overall_score = self.calculate_overall_score(rule_results, nlp_results)
837
  print(f"[SysCRED] ✓ Credibility score: {overall_score:.2f}")
838
 
839
- # 7. [NEW] GraphRAG Context Retrieval
840
- graph_context = ""
841
- similar_uris = []
842
- if self.graph_rag and 'source_analysis' in rule_results:
843
- domain = rule_results['source_analysis'].get('domain', '')
844
- # Pass keywords for text search if domain is empty or generic
845
- keywords = []
846
- if not domain and cleaned_text:
847
- keywords = cleaned_text.split()[:5] # Simple keyword extraction
848
-
849
- context = self.graph_rag.get_context(domain, keywords=keywords)
850
- graph_context = context.get('full_text', '')
851
- similar_uris = context.get('similar_uris', [])
852
-
853
- if "Graph Memory" in graph_context:
854
- print(f"[SysCRED] GraphRAG Context Found: {graph_context.splitlines()[1]}")
855
-
856
  # 8. Generate report (Updated to include context)
857
  report = self.generate_report(
858
  input_data, cleaned_text, rule_results,
 
464
  adjustments += w_ent * boost
465
  total_weight_used += w_ent
466
 
467
+ # 6. Text Coherence (12%) (Vocabulary Diversity)
468
+ w_coh = self.weights.get('coherence', 0.12)
469
  coherence = nlp_results.get('coherence_score')
470
  if coherence is not None:
471
  # Coherence is usually 0.0 to 1.0
472
  # Center around 0.5: >0.5 improves, <0.5 penalizes
473
  adjustments += (coherence - 0.5) * w_coh
474
  total_weight_used += w_coh
475
+
476
+ # 7. [NEW] GraphRAG Context Score (15%)
477
+ # This uses historical knowledge from the knowledge graph
478
+ w_graph = self.weights.get('graph_context', 0.15)
479
+ graph_context_data = rule_results.get('graph_context_data', {})
480
+ if graph_context_data and graph_context_data.get('confidence', 0) > 0:
481
+ # Use combined score from GraphRAG
482
+ graph_score = graph_context_data.get('combined_score', 0.5)
483
+ confidence = graph_context_data.get('confidence', 0)
484
+
485
+ # Scale adjustment by confidence (0 confidence = no effect)
486
+ adjustment_factor = (graph_score - 0.5) * w_graph * confidence
487
+ adjustments += adjustment_factor
488
+ total_weight_used += w_graph * confidence # Partial weight based on confidence
489
 
490
  # Final calculation
491
  # Base 0.5 + sum of weighted adjustments
 
662
  'coherence_score': nlp_results.get('coherence_score'),
663
  'sentiment_explanation_preview': (nlp_results.get('sentiment_explanation') or [])[:3]
664
  },
665
+ # [NEW] GraphRAG section
666
+ 'graphRAG': {
667
+ 'context_text': graph_context,
668
+ 'context_score': rule_results.get('graph_context_data', {}).get('combined_score'),
669
+ 'confidence': rule_results.get('graph_context_data', {}).get('confidence', 0),
670
+ 'has_history': rule_results.get('graph_context_data', {}).get('has_history', False),
671
+ 'history_count': rule_results.get('graph_context_data', {}).get('history_count', 0),
672
+ 'similar_claims_count': rule_results.get('graph_context_data', {}).get('similar_count', 0)
673
+ },
674
  # [NEW] TREC Evidence section
675
  'evidences': evidences or [],
676
  'metadonnees': {}
 
781
  'weight': f"{int(self.weights.get('sentiment_neutrality',0)*100)}%",
782
  'impact': '-' if sent.get('score', 0) > 0.9 else '0'
783
  })
784
+
785
+ # 5. GraphRAG Context (NEW)
786
+ graph_data = rule_results.get('graph_context_data', {})
787
+ if graph_data.get('confidence', 0) > 0:
788
+ graph_score = graph_data.get('combined_score', 0.5)
789
+ impact = '+' if graph_score > 0.6 else ('-' if graph_score < 0.4 else '0')
790
+ factors.append({
791
+ 'factor': 'Graph Context (History)',
792
+ 'value': f"Score: {graph_score:.2f}, Confidence: {graph_data.get('confidence', 0):.0%}",
793
+ 'weight': f"{int(self.weights.get('graph_context',0)*100)}%",
794
+ 'impact': impact,
795
+ 'history_count': graph_data.get('history_count', 0),
796
+ 'similar_count': graph_data.get('similar_count', 0)
797
+ })
798
 
799
  return factors
800
 
 
865
  print("[SysCRED] Running rule-based analysis...")
866
  rule_results = self.rule_based_analysis(cleaned_text, external_data)
867
 
868
+ # 5. [MOVED] GraphRAG Context Retrieval (Before NLP for context)
869
+ graph_context = ""
870
+ similar_uris = []
871
+ graph_context_data = {}
872
+
873
+ if self.graph_rag and 'source_analysis' in rule_results:
874
+ domain = rule_results['source_analysis'].get('domain', '')
875
+ # Pass keywords for text search if domain is empty or generic
876
+ keywords = []
877
+ if cleaned_text:
878
+ # Extract meaningful keywords (filter out short words)
879
+ keywords = [w for w in cleaned_text.split()[:10] if len(w) > 4]
880
+
881
+ # Get text context for display
882
+ context = self.graph_rag.get_context(domain, keywords=keywords)
883
+ graph_context = context.get('full_text', '')
884
+ similar_uris = context.get('similar_uris', [])
885
+
886
+ # Get numerical score for integration into scoring
887
+ graph_context_data = self.graph_rag.compute_context_score(domain, keywords=keywords)
888
+
889
+ # Add to rule_results for use in calculate_overall_score
890
+ rule_results['graph_context_data'] = graph_context_data
891
+
892
+ if graph_context_data.get('has_history'):
893
+ print(f"[SysCRED] GraphRAG: Domain has {graph_context_data['history_count']} prior evaluations, "
894
+ f"avg score: {graph_context_data['history_score']:.2f}")
895
+ if graph_context_data.get('similar_count', 0) > 0:
896
+ print(f"[SysCRED] GraphRAG: Found {graph_context_data['similar_count']} similar claims")
897
+
898
+ # 6. NLP analysis
899
  print("[SysCRED] Running NLP analysis...")
900
  nlp_results = self.nlp_analysis(cleaned_text)
901
 
902
+ # 7. Calculate score (Now includes GraphRAG context)
903
  overall_score = self.calculate_overall_score(rule_results, nlp_results)
904
  print(f"[SysCRED] ✓ Credibility score: {overall_score:.2f}")
905
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
906
  # 8. Generate report (Updated to include context)
907
  report = self.generate_report(
908
  input_data, cleaned_text, rule_results,