D Ф m i И i q ц e L Ф y e r commited on
Commit ·
8e97fc5
1
Parent(s): 6059862
Deploy SysCRED v2.3.1 - GraphRAG + LIAR benchmark + TREC integration
Browse files- syscred/.env.example +22 -0
- syscred/__init__.py +12 -2
- syscred/backend_app.py +258 -5
- syscred/config.py +8 -6
- syscred/datasets/liar/README +41 -0
- syscred/datasets/liar/test.tsv +0 -0
- syscred/datasets/liar/train.tsv +0 -0
- syscred/datasets/liar/valid.tsv +0 -0
- syscred/eval_metrics.py +9 -0
- syscred/graph_rag.py +130 -5
- syscred/liar_benchmark_test.csv +101 -0
- syscred/liar_benchmark_test.json +105 -0
- syscred/liar_dataset.py +359 -0
- syscred/run_liar_benchmark.py +434 -0
- syscred/run_liar_benchmark_remote.py +373 -0
- syscred/static/index.html +101 -2
- syscred/verification_system.py +71 -21
syscred/.env.example
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SysCRED Environment Configuration
|
| 2 |
+
# ==================================
|
| 3 |
+
# Copy this file to .env and fill in your values
|
| 4 |
+
|
| 5 |
+
# Supabase Database (PostgreSQL)
|
| 6 |
+
# Get this from: Supabase Dashboard > Settings > Database > Connection string
|
| 7 |
+
DATABASE_URL=postgresql://postgres:YOUR_PASSWORD@db.YOUR_PROJECT.supabase.co:5432/postgres
|
| 8 |
+
|
| 9 |
+
# Google Fact Check API Key
|
| 10 |
+
# Get this from: https://console.cloud.google.com/apis/credentials
|
| 11 |
+
GOOGLE_FACT_CHECK_API_KEY=your_google_api_key_here
|
| 12 |
+
|
| 13 |
+
# HuggingFace Space URL (for remote processing)
|
| 14 |
+
HF_SPACE_URL=https://domloyer-syscred.hf.space
|
| 15 |
+
|
| 16 |
+
# Flask Configuration
|
| 17 |
+
FLASK_DEBUG=false
|
| 18 |
+
FLASK_HOST=0.0.0.0
|
| 19 |
+
FLASK_PORT=5001
|
| 20 |
+
|
| 21 |
+
# ML Model Loading (set to false for lightweight mode)
|
| 22 |
+
LOAD_ML_MODELS=false
|
syscred/__init__.py
CHANGED
|
@@ -11,14 +11,15 @@ Modules:
|
|
| 11 |
- ir_engine: BM25, QLD, TF-IDF, PRF (from TREC)
|
| 12 |
- trec_retriever: Evidence retrieval for fact-checking (NEW v2.3)
|
| 13 |
- trec_dataset: TREC AP88-90 data loader (NEW v2.3)
|
|
|
|
| 14 |
- seo_analyzer: SEO analysis, PageRank estimation
|
| 15 |
- eval_metrics: MAP, NDCG, P@K, Recall, MRR
|
| 16 |
- ontology_manager: RDFLib integration
|
| 17 |
- verification_system: Main credibility pipeline
|
| 18 |
-
- graph_rag: GraphRAG for contextual memory
|
| 19 |
"""
|
| 20 |
|
| 21 |
-
__version__ = "2.3.
|
| 22 |
__author__ = "Dominique S. Loyer"
|
| 23 |
__citation__ = "loyerModelingHybridSystem2025"
|
| 24 |
|
|
@@ -29,11 +30,15 @@ from syscred.ontology_manager import OntologyManager
|
|
| 29 |
from syscred.seo_analyzer import SEOAnalyzer
|
| 30 |
from syscred.ir_engine import IREngine
|
| 31 |
from syscred.eval_metrics import EvaluationMetrics
|
|
|
|
| 32 |
|
| 33 |
# TREC Integration (NEW - Feb 2026)
|
| 34 |
from syscred.trec_retriever import TRECRetriever, Evidence, RetrievalResult
|
| 35 |
from syscred.trec_dataset import TRECDataset, TRECTopic
|
| 36 |
|
|
|
|
|
|
|
|
|
|
| 37 |
# Convenience alias
|
| 38 |
SysCRED = CredibilityVerificationSystem
|
| 39 |
|
|
@@ -46,10 +51,15 @@ __all__ = [
|
|
| 46 |
'SEOAnalyzer',
|
| 47 |
'IREngine',
|
| 48 |
'EvaluationMetrics',
|
|
|
|
| 49 |
# TREC (NEW)
|
| 50 |
'TRECRetriever',
|
| 51 |
'TRECDataset',
|
| 52 |
'TRECTopic',
|
| 53 |
'Evidence',
|
| 54 |
'RetrievalResult',
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
]
|
|
|
|
| 11 |
- ir_engine: BM25, QLD, TF-IDF, PRF (from TREC)
|
| 12 |
- trec_retriever: Evidence retrieval for fact-checking (NEW v2.3)
|
| 13 |
- trec_dataset: TREC AP88-90 data loader (NEW v2.3)
|
| 14 |
+
- liar_dataset: LIAR benchmark dataset loader (NEW v2.3)
|
| 15 |
- seo_analyzer: SEO analysis, PageRank estimation
|
| 16 |
- eval_metrics: MAP, NDCG, P@K, Recall, MRR
|
| 17 |
- ontology_manager: RDFLib integration
|
| 18 |
- verification_system: Main credibility pipeline
|
| 19 |
+
- graph_rag: GraphRAG for contextual memory (enhanced v2.3)
|
| 20 |
"""
|
| 21 |
|
| 22 |
+
__version__ = "2.3.1"
|
| 23 |
__author__ = "Dominique S. Loyer"
|
| 24 |
__citation__ = "loyerModelingHybridSystem2025"
|
| 25 |
|
|
|
|
| 30 |
from syscred.seo_analyzer import SEOAnalyzer
|
| 31 |
from syscred.ir_engine import IREngine
|
| 32 |
from syscred.eval_metrics import EvaluationMetrics
|
| 33 |
+
from syscred.graph_rag import GraphRAG
|
| 34 |
|
| 35 |
# TREC Integration (NEW - Feb 2026)
|
| 36 |
from syscred.trec_retriever import TRECRetriever, Evidence, RetrievalResult
|
| 37 |
from syscred.trec_dataset import TRECDataset, TRECTopic
|
| 38 |
|
| 39 |
+
# LIAR Benchmark (NEW - Feb 2026)
|
| 40 |
+
from syscred.liar_dataset import LIARDataset, LiarStatement, LiarLabel
|
| 41 |
+
|
| 42 |
# Convenience alias
|
| 43 |
SysCRED = CredibilityVerificationSystem
|
| 44 |
|
|
|
|
| 51 |
'SEOAnalyzer',
|
| 52 |
'IREngine',
|
| 53 |
'EvaluationMetrics',
|
| 54 |
+
'GraphRAG',
|
| 55 |
# TREC (NEW)
|
| 56 |
'TRECRetriever',
|
| 57 |
'TRECDataset',
|
| 58 |
'TRECTopic',
|
| 59 |
'Evidence',
|
| 60 |
'RetrievalResult',
|
| 61 |
+
# LIAR Benchmark (NEW)
|
| 62 |
+
'LIARDataset',
|
| 63 |
+
'LiarStatement',
|
| 64 |
+
'LiarLabel',
|
| 65 |
]
|
syscred/backend_app.py
CHANGED
|
@@ -17,6 +17,20 @@ Endpoints:
|
|
| 17 |
import sys
|
| 18 |
import os
|
| 19 |
import traceback
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
from flask import Flask, request, jsonify, send_from_directory
|
| 21 |
from flask_cors import CORS
|
| 22 |
|
|
@@ -31,10 +45,15 @@ try:
|
|
| 31 |
from syscred.ontology_manager import OntologyManager
|
| 32 |
from syscred.config import config, Config
|
| 33 |
from syscred.database import init_db, db, AnalysisResult
|
|
|
|
|
|
|
|
|
|
| 34 |
SYSCRED_AVAILABLE = True
|
| 35 |
-
|
|
|
|
| 36 |
except ImportError as e:
|
| 37 |
SYSCRED_AVAILABLE = False
|
|
|
|
| 38 |
print(f"[SysCRED Backend] Warning: Could not import modules: {e}")
|
| 39 |
# Define dummy init_db to prevent crash
|
| 40 |
def init_db(app): pass
|
|
@@ -63,6 +82,40 @@ except Exception as e:
|
|
| 63 |
# --- Initialize SysCRED System ---
|
| 64 |
credibility_system = None
|
| 65 |
seo_analyzer = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
def initialize_system():
|
| 68 |
"""Initialize the credibility system (lazy loading)."""
|
|
@@ -339,6 +392,201 @@ def ontology_stats():
|
|
| 339 |
}), 200
|
| 340 |
|
| 341 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 342 |
# --- Main ---
|
| 343 |
if __name__ == '__main__':
|
| 344 |
print("=" * 60)
|
|
@@ -354,10 +602,15 @@ if __name__ == '__main__':
|
|
| 354 |
print()
|
| 355 |
print("[SysCRED Backend] Starting Flask server...")
|
| 356 |
print("[SysCRED Backend] Endpoints:")
|
| 357 |
-
print(" - POST /api/verify
|
| 358 |
-
print(" - POST /api/seo
|
| 359 |
-
print(" - GET /api/ontology/stats
|
| 360 |
-
print(" - GET /api/health
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 361 |
print()
|
| 362 |
|
| 363 |
app.run(host='0.0.0.0', port=5001, debug=True)
|
|
|
|
| 17 |
import sys
|
| 18 |
import os
|
| 19 |
import traceback
|
| 20 |
+
|
| 21 |
+
# Load environment variables from .env file
|
| 22 |
+
from pathlib import Path
|
| 23 |
+
try:
|
| 24 |
+
from dotenv import load_dotenv
|
| 25 |
+
env_path = Path(__file__).parent / '.env'
|
| 26 |
+
if env_path.exists():
|
| 27 |
+
load_dotenv(env_path)
|
| 28 |
+
print(f"[SysCRED Backend] Loaded .env from {env_path}")
|
| 29 |
+
else:
|
| 30 |
+
print(f"[SysCRED Backend] No .env file found at {env_path}")
|
| 31 |
+
except ImportError:
|
| 32 |
+
print("[SysCRED Backend] python-dotenv not installed, using system env vars")
|
| 33 |
+
|
| 34 |
from flask import Flask, request, jsonify, send_from_directory
|
| 35 |
from flask_cors import CORS
|
| 36 |
|
|
|
|
| 45 |
from syscred.ontology_manager import OntologyManager
|
| 46 |
from syscred.config import config, Config
|
| 47 |
from syscred.database import init_db, db, AnalysisResult
|
| 48 |
+
# TREC modules
|
| 49 |
+
from syscred.trec_retriever import TRECRetriever, Evidence, RetrievalResult
|
| 50 |
+
from syscred.eval_metrics import EvaluationMetrics
|
| 51 |
SYSCRED_AVAILABLE = True
|
| 52 |
+
TREC_AVAILABLE = True
|
| 53 |
+
print("[SysCRED Backend] Modules imported successfully (including TREC)")
|
| 54 |
except ImportError as e:
|
| 55 |
SYSCRED_AVAILABLE = False
|
| 56 |
+
TREC_AVAILABLE = False
|
| 57 |
print(f"[SysCRED Backend] Warning: Could not import modules: {e}")
|
| 58 |
# Define dummy init_db to prevent crash
|
| 59 |
def init_db(app): pass
|
|
|
|
| 82 |
# --- Initialize SysCRED System ---
|
| 83 |
credibility_system = None
|
| 84 |
seo_analyzer = None
|
| 85 |
+
trec_retriever = None
|
| 86 |
+
eval_metrics = None
|
| 87 |
+
|
| 88 |
+
# Demo corpus for TREC (AP88-90 style documents)
|
| 89 |
+
TREC_DEMO_CORPUS = {
|
| 90 |
+
"AP880101-0001": {
|
| 91 |
+
"text": "Climate change is primarily caused by human activities, particularly the burning of fossil fuels which release greenhouse gases into the atmosphere.",
|
| 92 |
+
"title": "Climate Science Report"
|
| 93 |
+
},
|
| 94 |
+
"AP880101-0002": {
|
| 95 |
+
"text": "The Earth's temperature has risen significantly over the past century due to greenhouse gas emissions from industrial activities and deforestation.",
|
| 96 |
+
"title": "Global Warming Study"
|
| 97 |
+
},
|
| 98 |
+
"AP880102-0001": {
|
| 99 |
+
"text": "Scientists warn that sea levels could rise dramatically if current warming trends continue, threatening coastal cities worldwide.",
|
| 100 |
+
"title": "Sea Level Warning"
|
| 101 |
+
},
|
| 102 |
+
"AP890215-0001": {
|
| 103 |
+
"text": "The presidential election campaign focused on economic policies, healthcare reform, and national security issues.",
|
| 104 |
+
"title": "Election Coverage"
|
| 105 |
+
},
|
| 106 |
+
"AP890216-0001": {
|
| 107 |
+
"text": "Stock markets rose sharply after positive economic indicators were released by the Federal Reserve, signaling economic recovery.",
|
| 108 |
+
"title": "Financial News"
|
| 109 |
+
},
|
| 110 |
+
"AP880201-0001": {
|
| 111 |
+
"text": "Renewable energy sources like solar and wind power are becoming more cost-effective alternatives to fossil fuels.",
|
| 112 |
+
"title": "Green Energy Report"
|
| 113 |
+
},
|
| 114 |
+
"AP890301-0001": {
|
| 115 |
+
"text": "The technology industry continues to grow rapidly, with artificial intelligence and machine learning driving innovation.",
|
| 116 |
+
"title": "Tech Industry Update"
|
| 117 |
+
},
|
| 118 |
+
}
|
| 119 |
|
| 120 |
def initialize_system():
|
| 121 |
"""Initialize the credibility system (lazy loading)."""
|
|
|
|
| 392 |
}), 200
|
| 393 |
|
| 394 |
|
| 395 |
+
# --- TREC Endpoints ---
|
| 396 |
+
|
| 397 |
+
@app.route('/api/trec/search', methods=['POST'])
|
| 398 |
+
def trec_search():
|
| 399 |
+
"""
|
| 400 |
+
Search for evidence using TREC retrieval methods.
|
| 401 |
+
|
| 402 |
+
Request JSON:
|
| 403 |
+
{
|
| 404 |
+
"query": "Claim or query to search for",
|
| 405 |
+
"k": 10, # Number of results (optional, default 10)
|
| 406 |
+
"model": "bm25" # Retrieval model: bm25, tfidf, qld (optional)
|
| 407 |
+
}
|
| 408 |
+
|
| 409 |
+
Response:
|
| 410 |
+
{
|
| 411 |
+
"query": "original query",
|
| 412 |
+
"results": [
|
| 413 |
+
{"doc_id": "AP880101-0001", "score": 6.27, "rank": 1, "text": "...", "title": "..."},
|
| 414 |
+
...
|
| 415 |
+
],
|
| 416 |
+
"total": 3,
|
| 417 |
+
"model": "bm25",
|
| 418 |
+
"search_time_ms": 12.5
|
| 419 |
+
}
|
| 420 |
+
"""
|
| 421 |
+
global trec_retriever, eval_metrics
|
| 422 |
+
|
| 423 |
+
# Initialize TREC components if needed
|
| 424 |
+
if trec_retriever is None:
|
| 425 |
+
try:
|
| 426 |
+
trec_retriever = TRECRetriever(use_stemming=True, enable_prf=False)
|
| 427 |
+
trec_retriever.corpus = TREC_DEMO_CORPUS
|
| 428 |
+
eval_metrics = EvaluationMetrics()
|
| 429 |
+
print("[SysCRED Backend] TREC Retriever initialized with demo corpus")
|
| 430 |
+
except Exception as e:
|
| 431 |
+
return jsonify({'error': f'TREC initialization failed: {str(e)}'}), 503
|
| 432 |
+
|
| 433 |
+
if not request.is_json:
|
| 434 |
+
return jsonify({'error': 'Request must be JSON'}), 400
|
| 435 |
+
|
| 436 |
+
data = request.get_json()
|
| 437 |
+
query = data.get('query', '').strip()
|
| 438 |
+
|
| 439 |
+
if not query:
|
| 440 |
+
return jsonify({'error': "'query' is required"}), 400
|
| 441 |
+
|
| 442 |
+
k = data.get('k', 10)
|
| 443 |
+
model = data.get('model', 'bm25')
|
| 444 |
+
|
| 445 |
+
try:
|
| 446 |
+
import time
|
| 447 |
+
start_time = time.time()
|
| 448 |
+
|
| 449 |
+
# Retrieve evidence
|
| 450 |
+
result = trec_retriever.retrieve_evidence(query, k=k, model=model)
|
| 451 |
+
search_time_ms = (time.time() - start_time) * 1000
|
| 452 |
+
|
| 453 |
+
# Format results
|
| 454 |
+
results = []
|
| 455 |
+
for ev in result.evidences:
|
| 456 |
+
doc_info = trec_retriever.corpus.get(ev.doc_id, {})
|
| 457 |
+
results.append({
|
| 458 |
+
'doc_id': ev.doc_id,
|
| 459 |
+
'score': round(ev.score, 4),
|
| 460 |
+
'rank': ev.rank,
|
| 461 |
+
'text': ev.text,
|
| 462 |
+
'title': doc_info.get('title', ''),
|
| 463 |
+
'model': ev.retrieval_model
|
| 464 |
+
})
|
| 465 |
+
|
| 466 |
+
return jsonify({
|
| 467 |
+
'query': query,
|
| 468 |
+
'results': results,
|
| 469 |
+
'total': len(results),
|
| 470 |
+
'model': model,
|
| 471 |
+
'search_time_ms': round(search_time_ms, 2)
|
| 472 |
+
}), 200
|
| 473 |
+
|
| 474 |
+
except Exception as e:
|
| 475 |
+
print(f"[SysCRED Backend] TREC search error: {e}")
|
| 476 |
+
traceback.print_exc()
|
| 477 |
+
return jsonify({'error': str(e)}), 500
|
| 478 |
+
|
| 479 |
+
|
| 480 |
+
@app.route('/api/trec/corpus', methods=['GET'])
|
| 481 |
+
def trec_corpus():
|
| 482 |
+
"""
|
| 483 |
+
Get the TREC demo corpus information.
|
| 484 |
+
|
| 485 |
+
Response:
|
| 486 |
+
{
|
| 487 |
+
"corpus_size": 7,
|
| 488 |
+
"corpus_type": "AP88-90 Demo",
|
| 489 |
+
"documents": [
|
| 490 |
+
{"doc_id": "AP880101-0001", "title": "...", "text_preview": "..."},
|
| 491 |
+
...
|
| 492 |
+
]
|
| 493 |
+
}
|
| 494 |
+
"""
|
| 495 |
+
docs = []
|
| 496 |
+
for doc_id, doc in TREC_DEMO_CORPUS.items():
|
| 497 |
+
docs.append({
|
| 498 |
+
'doc_id': doc_id,
|
| 499 |
+
'title': doc.get('title', ''),
|
| 500 |
+
'text_preview': doc['text'][:150] + '...' if len(doc['text']) > 150 else doc['text']
|
| 501 |
+
})
|
| 502 |
+
|
| 503 |
+
return jsonify({
|
| 504 |
+
'corpus_size': len(TREC_DEMO_CORPUS),
|
| 505 |
+
'corpus_type': 'AP88-90 Demo',
|
| 506 |
+
'documents': docs
|
| 507 |
+
}), 200
|
| 508 |
+
|
| 509 |
+
|
| 510 |
+
@app.route('/api/trec/metrics', methods=['POST'])
|
| 511 |
+
def trec_metrics():
|
| 512 |
+
"""
|
| 513 |
+
Calculate IR evaluation metrics for a retrieval result.
|
| 514 |
+
|
| 515 |
+
Request JSON:
|
| 516 |
+
{
|
| 517 |
+
"retrieved": ["AP880101-0001", "AP890215-0001", "AP880101-0002"],
|
| 518 |
+
"relevant": ["AP880101-0001", "AP880101-0002", "AP880102-0001"]
|
| 519 |
+
}
|
| 520 |
+
|
| 521 |
+
Response:
|
| 522 |
+
{
|
| 523 |
+
"precision_at_3": 0.67,
|
| 524 |
+
"recall_at_3": 0.67,
|
| 525 |
+
"average_precision": 0.81,
|
| 526 |
+
"mrr": 1.0,
|
| 527 |
+
"ndcg_at_3": 0.88
|
| 528 |
+
}
|
| 529 |
+
"""
|
| 530 |
+
global eval_metrics
|
| 531 |
+
|
| 532 |
+
if eval_metrics is None:
|
| 533 |
+
eval_metrics = EvaluationMetrics()
|
| 534 |
+
|
| 535 |
+
if not request.is_json:
|
| 536 |
+
return jsonify({'error': 'Request must be JSON'}), 400
|
| 537 |
+
|
| 538 |
+
data = request.get_json()
|
| 539 |
+
retrieved = data.get('retrieved', [])
|
| 540 |
+
relevant = set(data.get('relevant', []))
|
| 541 |
+
|
| 542 |
+
if not retrieved:
|
| 543 |
+
return jsonify({'error': "'retrieved' list is required"}), 400
|
| 544 |
+
|
| 545 |
+
k = len(retrieved)
|
| 546 |
+
|
| 547 |
+
try:
|
| 548 |
+
# Calculate metrics
|
| 549 |
+
p_at_k = eval_metrics.precision_at_k(retrieved, relevant, k)
|
| 550 |
+
r_at_k = eval_metrics.recall_at_k(retrieved, relevant, k)
|
| 551 |
+
ap = eval_metrics.average_precision(retrieved, relevant)
|
| 552 |
+
mrr = eval_metrics.mrr(retrieved, relevant)
|
| 553 |
+
|
| 554 |
+
# For NDCG, create relevance dict (binary: 1 if relevant, 0 otherwise)
|
| 555 |
+
relevance_dict = {doc: 1 for doc in relevant}
|
| 556 |
+
ndcg = eval_metrics.ndcg_at_k(retrieved, relevance_dict, k)
|
| 557 |
+
|
| 558 |
+
return jsonify({
|
| 559 |
+
f'precision_at_{k}': round(p_at_k, 4),
|
| 560 |
+
f'recall_at_{k}': round(r_at_k, 4),
|
| 561 |
+
'average_precision': round(ap, 4),
|
| 562 |
+
'mrr': round(mrr, 4),
|
| 563 |
+
f'ndcg_at_{k}': round(ndcg, 4),
|
| 564 |
+
'metrics_explanation': {
|
| 565 |
+
'P@K': 'Proportion de documents pertinents parmi les K premiers récupérés',
|
| 566 |
+
'R@K': 'Proportion de documents pertinents récupérés parmi tous les pertinents',
|
| 567 |
+
'AP': 'Moyenne des précisions à chaque document pertinent trouvé',
|
| 568 |
+
'MRR': 'Rang réciproque du premier document pertinent',
|
| 569 |
+
'NDCG': 'Gain cumulatif normalisé avec décroissance logarithmique'
|
| 570 |
+
}
|
| 571 |
+
}), 200
|
| 572 |
+
|
| 573 |
+
except Exception as e:
|
| 574 |
+
print(f"[SysCRED Backend] TREC metrics error: {e}")
|
| 575 |
+
return jsonify({'error': str(e)}), 500
|
| 576 |
+
|
| 577 |
+
|
| 578 |
+
@app.route('/api/trec/health', methods=['GET'])
|
| 579 |
+
def trec_health():
|
| 580 |
+
"""Health check for TREC module."""
|
| 581 |
+
return jsonify({
|
| 582 |
+
'status': 'healthy',
|
| 583 |
+
'trec_available': TREC_AVAILABLE if 'TREC_AVAILABLE' in dir() else True,
|
| 584 |
+
'retriever_initialized': trec_retriever is not None,
|
| 585 |
+
'corpus_size': len(TREC_DEMO_CORPUS),
|
| 586 |
+
'models_available': ['bm25', 'tfidf', 'qld']
|
| 587 |
+
}), 200
|
| 588 |
+
|
| 589 |
+
|
| 590 |
# --- Main ---
|
| 591 |
if __name__ == '__main__':
|
| 592 |
print("=" * 60)
|
|
|
|
| 602 |
print()
|
| 603 |
print("[SysCRED Backend] Starting Flask server...")
|
| 604 |
print("[SysCRED Backend] Endpoints:")
|
| 605 |
+
print(" - POST /api/verify - Full credibility verification")
|
| 606 |
+
print(" - POST /api/seo - SEO analysis only (faster)")
|
| 607 |
+
print(" - GET /api/ontology/stats - Ontology statistics")
|
| 608 |
+
print(" - GET /api/health - Health check")
|
| 609 |
+
print(" --- TREC Endpoints ---")
|
| 610 |
+
print(" - POST /api/trec/search - Evidence retrieval (BM25/TF-IDF/QLD)")
|
| 611 |
+
print(" - POST /api/trec/metrics - Calculate IR metrics (MAP, P@K, NDCG)")
|
| 612 |
+
print(" - GET /api/trec/corpus - Demo corpus info")
|
| 613 |
+
print(" - GET /api/trec/health - TREC module health")
|
| 614 |
print()
|
| 615 |
|
| 616 |
app.run(host='0.0.0.0', port=5001, debug=True)
|
syscred/config.py
CHANGED
|
@@ -88,13 +88,15 @@ class Config:
|
|
| 88 |
PRF_EXPANSION_TERMS = int(os.getenv("SYSCRED_PRF_TERMS", "10"))
|
| 89 |
|
| 90 |
# === Pondération des scores ===
|
|
|
|
| 91 |
SCORE_WEIGHTS = {
|
| 92 |
-
'source_reputation': 0.25,
|
| 93 |
-
'domain_age': 0.10
|
| 94 |
-
'sentiment_neutrality': 0.15
|
| 95 |
-
'entity_presence': 0.15
|
| 96 |
-
'coherence': 0.15
|
| 97 |
-
'fact_check': 0.20
|
|
|
|
| 98 |
}
|
| 99 |
|
| 100 |
# === Seuils de crédibilité ===
|
|
|
|
| 88 |
PRF_EXPANSION_TERMS = int(os.getenv("SYSCRED_PRF_TERMS", "10"))
|
| 89 |
|
| 90 |
# === Pondération des scores ===
|
| 91 |
+
# Note: Weights should sum to 1.0 for proper normalization
|
| 92 |
SCORE_WEIGHTS = {
|
| 93 |
+
'source_reputation': 0.22, # Was 0.25, reduced for graph_context
|
| 94 |
+
'domain_age': 0.08, # Was 0.10
|
| 95 |
+
'sentiment_neutrality': 0.13, # Was 0.15
|
| 96 |
+
'entity_presence': 0.13, # Was 0.15
|
| 97 |
+
'coherence': 0.12, # Was 0.15
|
| 98 |
+
'fact_check': 0.17, # Was 0.20
|
| 99 |
+
'graph_context': 0.15 # NEW - Historical knowledge from GraphRAG
|
| 100 |
}
|
| 101 |
|
| 102 |
# === Seuils de crédibilité ===
|
syscred/datasets/liar/README
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
LIAR: A BENCHMARK DATASET FOR FAKE NEWS DETECTION
|
| 2 |
+
|
| 3 |
+
William Yang Wang, "Liar, Liar Pants on Fire": A New Benchmark Dataset for Fake News Detection, to appear in Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (ACL 2017), short paper, Vancouver, BC, Canada, July 30-August 4, ACL.
|
| 4 |
+
=====================================================================
|
| 5 |
+
Description of the TSV format:
|
| 6 |
+
|
| 7 |
+
Column 1: the ID of the statement ([ID].json).
|
| 8 |
+
Column 2: the label.
|
| 9 |
+
Column 3: the statement.
|
| 10 |
+
Column 4: the subject(s).
|
| 11 |
+
Column 5: the speaker.
|
| 12 |
+
Column 6: the speaker's job title.
|
| 13 |
+
Column 7: the state info.
|
| 14 |
+
Column 8: the party affiliation.
|
| 15 |
+
Column 9-13: the total credit history count, including the current statement.
|
| 16 |
+
9: barely true counts.
|
| 17 |
+
10: false counts.
|
| 18 |
+
11: half true counts.
|
| 19 |
+
12: mostly true counts.
|
| 20 |
+
13: pants on fire counts.
|
| 21 |
+
Column 14: the context (venue / location of the speech or statement).
|
| 22 |
+
|
| 23 |
+
Note that we do not provide the full-text verdict report in this current version of the dataset,
|
| 24 |
+
but you can use the following command to access the full verdict report and links to the source documents:
|
| 25 |
+
wget http://www.politifact.com//api/v/2/statement/[ID]/?format=json
|
| 26 |
+
|
| 27 |
+
======================================================================
|
| 28 |
+
The original sources retain the copyright of the data.
|
| 29 |
+
|
| 30 |
+
Note that there are absolutely no guarantees with this data,
|
| 31 |
+
and we provide this dataset "as is",
|
| 32 |
+
but you are welcome to report the issues of the preliminary version
|
| 33 |
+
of this data.
|
| 34 |
+
|
| 35 |
+
You are allowed to use this dataset for research purposes only.
|
| 36 |
+
|
| 37 |
+
For more question about the dataset, please contact:
|
| 38 |
+
William Wang, william@cs.ucsb.edu
|
| 39 |
+
|
| 40 |
+
v1.0 04/23/2017
|
| 41 |
+
|
syscred/datasets/liar/test.tsv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
syscred/datasets/liar/train.tsv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
syscred/datasets/liar/valid.tsv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
syscred/eval_metrics.py
CHANGED
|
@@ -133,6 +133,15 @@ class EvaluationMetrics:
|
|
| 133 |
return 1.0 / (i + 1)
|
| 134 |
return 0.0
|
| 135 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
# --- TREC-Style Evaluation ---
|
| 137 |
|
| 138 |
def evaluate_run(
|
|
|
|
| 133 |
return 1.0 / (i + 1)
|
| 134 |
return 0.0
|
| 135 |
|
| 136 |
+
@staticmethod
|
| 137 |
+
def mrr(retrieved: List[str], relevant: set) -> float:
|
| 138 |
+
"""
|
| 139 |
+
Alias for reciprocal_rank (Mean Reciprocal Rank for single query).
|
| 140 |
+
|
| 141 |
+
MRR = 1 / rank of first relevant document
|
| 142 |
+
"""
|
| 143 |
+
return EvaluationMetrics.reciprocal_rank(retrieved, relevant)
|
| 144 |
+
|
| 145 |
# --- TREC-Style Evaluation ---
|
| 146 |
|
| 147 |
def evaluate_run(
|
syscred/graph_rag.py
CHANGED
|
@@ -117,13 +117,13 @@ class GraphRAG:
|
|
| 117 |
Returns dict with 'text' (for LLM) and 'uris' (for Graph linking).
|
| 118 |
"""
|
| 119 |
if not keywords:
|
| 120 |
-
return {"text": "", "uris": []}
|
| 121 |
|
| 122 |
# Build REGEX filter for keywords (OR logic)
|
| 123 |
# e.g., (fake|hoax|conspiracy)
|
| 124 |
clean_kws = [k for k in keywords if len(k) > 3] # Skip short words
|
| 125 |
if not clean_kws:
|
| 126 |
-
return {"text": "", "uris": []}
|
| 127 |
|
| 128 |
regex_pattern = "|".join(clean_kws)
|
| 129 |
|
|
@@ -156,10 +156,10 @@ class GraphRAG:
|
|
| 156 |
})
|
| 157 |
except Exception as e:
|
| 158 |
print(f"[GraphRAG] Similar claims error: {e}")
|
| 159 |
-
return {"text": "", "uris": []}
|
| 160 |
|
| 161 |
if not results:
|
| 162 |
-
return {"text": "", "uris": []}
|
| 163 |
|
| 164 |
lines = [f"Found {len(results)} similar claims in history:"]
|
| 165 |
for r in results:
|
|
@@ -167,5 +167,130 @@ class GraphRAG:
|
|
| 167 |
|
| 168 |
return {
|
| 169 |
"text": "\n".join(lines),
|
| 170 |
-
"uris": [r['uri'] for r in results]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
}
|
|
|
|
| 117 |
Returns dict with 'text' (for LLM) and 'uris' (for Graph linking).
|
| 118 |
"""
|
| 119 |
if not keywords:
|
| 120 |
+
return {"text": "", "uris": [], "scores": []}
|
| 121 |
|
| 122 |
# Build REGEX filter for keywords (OR logic)
|
| 123 |
# e.g., (fake|hoax|conspiracy)
|
| 124 |
clean_kws = [k for k in keywords if len(k) > 3] # Skip short words
|
| 125 |
if not clean_kws:
|
| 126 |
+
return {"text": "", "uris": [], "scores": []}
|
| 127 |
|
| 128 |
regex_pattern = "|".join(clean_kws)
|
| 129 |
|
|
|
|
| 156 |
})
|
| 157 |
except Exception as e:
|
| 158 |
print(f"[GraphRAG] Similar claims error: {e}")
|
| 159 |
+
return {"text": "", "uris": [], "scores": []}
|
| 160 |
|
| 161 |
if not results:
|
| 162 |
+
return {"text": "", "uris": [], "scores": []}
|
| 163 |
|
| 164 |
lines = [f"Found {len(results)} similar claims in history:"]
|
| 165 |
for r in results:
|
|
|
|
| 167 |
|
| 168 |
return {
|
| 169 |
"text": "\n".join(lines),
|
| 170 |
+
"uris": [r['uri'] for r in results],
|
| 171 |
+
"scores": [r['score'] for r in results]
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
def compute_context_score(self, domain: str, keywords: List[str] = []) -> Dict[str, float]:
|
| 175 |
+
"""
|
| 176 |
+
Compute numerical context scores for integration into credibility scoring.
|
| 177 |
+
|
| 178 |
+
This transforms the GraphRAG context into actionable numerical scores
|
| 179 |
+
that can be directly used in the calculate_overall_score() function.
|
| 180 |
+
|
| 181 |
+
Args:
|
| 182 |
+
domain: The domain being analyzed (e.g., 'lemonde.fr')
|
| 183 |
+
keywords: List of keywords from the claim
|
| 184 |
+
|
| 185 |
+
Returns:
|
| 186 |
+
Dictionary with:
|
| 187 |
+
- 'history_score': 0.0-1.0 based on past evaluations of this domain
|
| 188 |
+
- 'pattern_score': 0.0-1.0 based on similar claims in the graph
|
| 189 |
+
- 'combined_score': Weighted average (0.7 * history + 0.3 * pattern)
|
| 190 |
+
- 'confidence': How confident we are (based on amount of data)
|
| 191 |
+
- 'has_history': Boolean if domain has prior evaluations
|
| 192 |
+
"""
|
| 193 |
+
result = {
|
| 194 |
+
'history_score': 0.5, # Neutral default
|
| 195 |
+
'pattern_score': 0.5,
|
| 196 |
+
'combined_score': 0.5,
|
| 197 |
+
'confidence': 0.0,
|
| 198 |
+
'has_history': False,
|
| 199 |
+
'history_count': 0,
|
| 200 |
+
'similar_count': 0
|
| 201 |
+
}
|
| 202 |
+
|
| 203 |
+
if not self.om:
|
| 204 |
+
return result
|
| 205 |
+
|
| 206 |
+
# 1. Get source history score
|
| 207 |
+
history_data = self._get_source_history_data(domain)
|
| 208 |
+
if history_data['count'] > 0:
|
| 209 |
+
result['history_score'] = history_data['avg_score']
|
| 210 |
+
result['has_history'] = True
|
| 211 |
+
result['history_count'] = history_data['count']
|
| 212 |
+
# Confidence increases with more data points (max at 5)
|
| 213 |
+
history_confidence = min(1.0, history_data['count'] / 5)
|
| 214 |
+
else:
|
| 215 |
+
history_confidence = 0.0
|
| 216 |
+
|
| 217 |
+
# 2. Get pattern score from similar claims
|
| 218 |
+
if keywords:
|
| 219 |
+
similar_result = self._find_similar_claims(keywords)
|
| 220 |
+
scores = similar_result.get('scores', [])
|
| 221 |
+
if scores:
|
| 222 |
+
result['pattern_score'] = sum(scores) / len(scores)
|
| 223 |
+
result['similar_count'] = len(scores)
|
| 224 |
+
pattern_confidence = min(1.0, len(scores) / 3)
|
| 225 |
+
else:
|
| 226 |
+
pattern_confidence = 0.0
|
| 227 |
+
else:
|
| 228 |
+
pattern_confidence = 0.0
|
| 229 |
+
|
| 230 |
+
# 3. Calculate combined score
|
| 231 |
+
# Weight history more heavily than pattern matching
|
| 232 |
+
if result['has_history'] and result['similar_count'] > 0:
|
| 233 |
+
result['combined_score'] = 0.7 * result['history_score'] + 0.3 * result['pattern_score']
|
| 234 |
+
result['confidence'] = 0.6 * history_confidence + 0.4 * pattern_confidence
|
| 235 |
+
elif result['has_history']:
|
| 236 |
+
result['combined_score'] = result['history_score']
|
| 237 |
+
result['confidence'] = history_confidence * 0.8 # Reduce confidence without pattern
|
| 238 |
+
elif result['similar_count'] > 0:
|
| 239 |
+
result['combined_score'] = result['pattern_score']
|
| 240 |
+
result['confidence'] = pattern_confidence * 0.5 # Lower confidence with only patterns
|
| 241 |
+
else:
|
| 242 |
+
# No data available - return neutral
|
| 243 |
+
result['combined_score'] = 0.5
|
| 244 |
+
result['confidence'] = 0.0
|
| 245 |
+
|
| 246 |
+
return result
|
| 247 |
+
|
| 248 |
+
def _get_source_history_data(self, domain: str) -> Dict[str, Any]:
|
| 249 |
+
"""
|
| 250 |
+
Query the graph for evaluation statistics of this domain.
|
| 251 |
+
|
| 252 |
+
Returns:
|
| 253 |
+
Dictionary with 'count', 'avg_score', 'last_verdict', 'scores'
|
| 254 |
+
"""
|
| 255 |
+
if not domain:
|
| 256 |
+
return {'count': 0, 'avg_score': 0.5, 'scores': []}
|
| 257 |
+
|
| 258 |
+
query = """
|
| 259 |
+
PREFIX cred: <https://github.com/DominiqueLoyer/systemFactChecking#>
|
| 260 |
+
|
| 261 |
+
SELECT ?score ?level ?timestamp
|
| 262 |
+
WHERE {
|
| 263 |
+
?info cred:informationURL ?url .
|
| 264 |
+
?request cred:concernsInformation ?info .
|
| 265 |
+
?report cred:isReportOf ?request .
|
| 266 |
+
?report cred:credibilityScoreValue ?score .
|
| 267 |
+
?report cred:assignsCredibilityLevel ?level .
|
| 268 |
+
?report cred:completionTimestamp ?timestamp .
|
| 269 |
+
FILTER(CONTAINS(STR(?url), "%s"))
|
| 270 |
+
}
|
| 271 |
+
ORDER BY DESC(?timestamp)
|
| 272 |
+
LIMIT 10
|
| 273 |
+
""" % domain
|
| 274 |
+
|
| 275 |
+
scores = []
|
| 276 |
+
last_verdict = None
|
| 277 |
+
|
| 278 |
+
try:
|
| 279 |
+
combined = self.om.base_graph + self.om.data_graph
|
| 280 |
+
for i, row in enumerate(combined.query(query)):
|
| 281 |
+
scores.append(float(row.score))
|
| 282 |
+
if i == 0:
|
| 283 |
+
last_verdict = str(row.level).split('#')[-1]
|
| 284 |
+
except Exception as e:
|
| 285 |
+
print(f"[GraphRAG] History data query error: {e}")
|
| 286 |
+
return {'count': 0, 'avg_score': 0.5, 'scores': []}
|
| 287 |
+
|
| 288 |
+
if not scores:
|
| 289 |
+
return {'count': 0, 'avg_score': 0.5, 'scores': []}
|
| 290 |
+
|
| 291 |
+
return {
|
| 292 |
+
'count': len(scores),
|
| 293 |
+
'avg_score': sum(scores) / len(scores),
|
| 294 |
+
'last_verdict': last_verdict,
|
| 295 |
+
'scores': scores
|
| 296 |
}
|
syscred/liar_benchmark_test.csv
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id,statement,ground_truth_6way,ground_truth_binary,ground_truth_ternary,speaker,party,syscred_score,predicted_binary,predicted_ternary,binary_correct,ternary_correct,processing_time,error,sentiment,bias
|
| 2 |
+
4134.json,"Your tax dollars are not being used to sue you, the people.",FALSE,Fake,False,alan-hays,republican,0.52,Real,Mixed,False,False,3.3536229133605957,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 3 |
+
5710.json,General Motors is the largest corporation in the world again.,FALSE,Fake,False,joe-biden,democrat,0.52,Real,Mixed,False,False,0.7072207927703857,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 4 |
+
7356.json,Three in four low-income workers dont have any paid sick days available.,TRUE,Real,True,sherrod-brown,democrat,0.52,Real,Mixed,True,False,0.7895858287811279,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 5 |
+
11200.json,"Says 57 percent of federal spending goes to the military and just 1 percent goes to food and agriculture, including food stamps.",FALSE,Fake,False,facebook-posts,none,0.35,Fake,Mixed,True,False,0.8112809658050537,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 6 |
+
447.json,I have never said that I don't wear flag pins or refuse to wear flag pins.,FALSE,Fake,False,barack-obama,democrat,0.52,Real,Mixed,False,False,0.7085680961608887,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 7 |
+
2852.json,More than 10 years of free trade has brought a $2 billion per day trade deficit.,MOSTLY_TRUE,Real,True,sherrod-brown,democrat,0.52,Real,Mixed,True,False,1.0006098747253418,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 8 |
+
13487.json,"Says that when San Francisco banned plastic grocery bags, you saw the number of instances of people going to the ER with things like salmonella and other related illnesses spike.",BARELY_TRUE,Fake,Mixed,james-quintero,none,0.52,Real,Mixed,False,True,1.4090971946716309,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 9 |
+
1084.json,"Now, there was a time when someone like Scalia and Ginsburg got 95-plus votes.",TRUE,Real,True,lindsey-graham,republican,0.52,Real,Mixed,True,False,0.8899619579315186,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 10 |
+
13203.json,Toomey and Trump will ban abortion and punish women who have them.,FALSE,Fake,False,naral-pro-choice,organization,0.35,Fake,Mixed,True,False,0.9519002437591553,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 11 |
+
5993.json,Says Larry Taylor gave in-state tuition to illegal immigrants.,PANTS_FIRE,Fake,False,conservative-voters-texas-pac,none,0.52,Real,Mixed,False,False,1.0933752059936523,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 12 |
+
4116.json,"POTUS economists: Stimulus Has Cost $278,000 per job.",FALSE,Fake,False,john-boehner,republican,0.52,Real,Mixed,False,False,0.6499719619750977,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 13 |
+
12599.json,"If you are a member of union, your median weekly income is roughly $200 more than if you are a nonunion member, and that doesnt include benefits.",TRUE,Real,True,thomas-perez,democrat,0.52,Real,Mixed,True,False,0.7228951454162598,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 14 |
+
7819.json,"18 percent of our land in our state right now is either federally, state or county owned for conservation purposes.",TRUE,Real,True,alberta-darling,republican,0.52,Real,Mixed,True,False,0.9661009311676025,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 15 |
+
2374.json,"Like me, Scott Walker opposed the 1998 transportation bill and the $9 billion of wasteful spending.",FALSE,Fake,False,jim-sensenbrenner,republican,0.52,Real,Mixed,False,False,0.9515271186828613,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 16 |
+
3941.json,The Democrat-led Senate has failed to pass a budget for 750 days ... Senate Democrats have neglected one of their most basic responsibilities.,HALF_TRUE,Real,Mixed,rob-portman,republican,0.54,Real,Mixed,True,True,0.6562759876251221,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 17 |
+
1280.json,"Here in Florida, Ive slashed government by 10 percent. That's $7 billion.",BARELY_TRUE,Fake,Mixed,charlie-crist,democrat,0.52,Real,Mixed,False,True,0.9024300575256348,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 18 |
+
11661.json,If Trump had just put his fathers money in a mutual fund ... hed have $8 billion.,FALSE,Fake,False,occupy-democrats,organization,0.36,Fake,Mixed,True,False,0.8690879344940186,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 19 |
+
6.json,Barack Obama and Hillary Clinton have changed their positions (on the Iraq war withdrawal) to follow Chris Dodd.,HALF_TRUE,Real,Mixed,chris-dodd,democrat,0.53,Real,Mixed,True,True,1.3197407722473145,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 20 |
+
3869.json,"When the union says I want to eliminate tenure, thats not true.",HALF_TRUE,Real,Mixed,chris-christie,republican,0.52,Real,Mixed,True,True,0.9137670993804932,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 21 |
+
11229.json,"Last year, we had zero percent growth in GDP in Virginia ...The only states that did worse than us were Alaska and Mississippi.",TRUE,Real,True,glen-sturtevant,republican,0.53,Real,Mixed,True,False,0.8765408992767334,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 22 |
+
12703.json,"In South Sudan, more teenage girls die in childbirth than finish high school.",MOSTLY_TRUE,Real,True,unicef,none,0.52,Real,Mixed,True,False,1.0562257766723633,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 23 |
+
6320.json,Gov. Romneys plan would cut taxes for the folks at the very top.,MOSTLY_TRUE,Real,True,barack-obama,democrat,0.52,Real,Mixed,True,False,0.991412878036499,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 24 |
+
7136.json,Says the state is required pick up 70 percent of the cost of K-12 education.,BARELY_TRUE,Fake,Mixed,john-kitzhaber,democrat,0.53,Real,Mixed,False,True,0.9649970531463623,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 25 |
+
7552.json,Georgia loses millions of dollars of revenue because people go out of state to buy fireworks.,MOSTLY_TRUE,Real,True,jeff-mullis,republican,0.53,Real,Mixed,True,False,0.7320408821105957,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 26 |
+
11118.json,"With the exception of baby formula, the federal government does not require any food to carry an expiration date, and state laws vary widely.",TRUE,Real,True,john-oliver,none,0.52,Real,Mixed,True,False,1.1864137649536133,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 27 |
+
7244.json,The amount of money that we put into running our own state legislature is nearly as much as we put into the University of Rhode Island.,FALSE,Fake,False,dawson-hodgson,republican,0.52,Real,Mixed,False,False,1.2284369468688965,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 28 |
+
3215.json,Four state Assembly Democrats scored a death blow to northeast Wisconsins economy by killing hundreds of jobs at a potential Bass Pro Shops near Green Bay.,PANTS_FIRE,Fake,False,scott-suder,republican,0.53,Real,Mixed,False,False,1.3722262382507324,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 29 |
+
6858.json,"Says opponent U.S. Rep. Gwen Moore is Wisconsins most absent member of Congress, missing nearly 17 percent of the House votes in the second quarter of 2012.",MOSTLY_TRUE,Real,True,dan-sebring,republican,0.52,Real,Mixed,True,False,1.0067930221557617,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 30 |
+
4161.json,Active duty males in the military are twice as likely to develop prostate cancer than their civilian counterparts.,MOSTLY_TRUE,Real,True,cliff-stearns,republican,0.52,Real,Mixed,True,False,0.817112922668457,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 31 |
+
4568.json,"Says Vince Polistina is collecting nearly $70,000 in taxpayer-funded salaries -- plus a government pension.",BARELY_TRUE,Fake,Mixed,new-jersey-democratic-state-committee,democrat,0.53,Real,Mixed,False,True,0.802901029586792,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 32 |
+
11256.json,"Bernie Sanders opposesthe Trans-Pacific Partnership, and Hillary Clinton supportsit.",MOSTLY_TRUE,Real,True,viral-image,none,0.52,Real,Mixed,True,False,0.9945018291473389,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 33 |
+
10450.json,"When you look at the earned income tax credit, it has about a 25 percent fraud rate. Were looking at $20 billion to $30 billion.",HALF_TRUE,Real,Mixed,rand-paul,republican,0.54,Real,Mixed,True,True,1.004997968673706,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 34 |
+
8419.json,Virtually every person across this country has seen premiums going up and up and up due to Obamacare.,FALSE,Fake,False,ted-cruz,republican,0.52,Real,Mixed,False,False,0.9335529804229736,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 35 |
+
1015.json,Democrats in Congress had control since January of 2007. They haven't passed a law making waterboarding illegal. They haven't gone into any of these things and changed law.,BARELY_TRUE,Fake,Mixed,newt-gingrich,republican,0.54,Real,Mixed,False,True,0.8649272918701172,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 36 |
+
8557.json,"The United States has never been richer, if you look at per capita GDP.",TRUE,Real,True,jan-schakowsky,democrat,0.53,Real,Mixed,True,False,1.4557609558105469,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 37 |
+
3103.json,"George Allen voted for budgets that increased the national debt by $16,400 for every second he served in the U.S. Senate.",TRUE,Real,True,jamie-radtke,republican,0.52,Real,Mixed,True,False,0.8361649513244629,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 38 |
+
1576.json,Health insurance companies' costs are only 4 percent of all health care spending.,TRUE,Real,True,americas-health-insurance-plans,none,0.53,Real,Mixed,True,False,1.2063438892364502,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 39 |
+
4131.json,"The State Election Board has issued nearly $275,000 in fines to violators of absentee ballot laws.",TRUE,Real,True,brian-kemp,republican,0.53,Real,Mixed,True,False,1.1224148273468018,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 40 |
+
7677.json,"The sex-offender registry has been around for a long time, and the research thats out there says that it has no positive impact on the public safety.",MOSTLY_TRUE,Real,True,ohio-public-defenders-office,none,0.52,Real,Mixed,True,False,1.0617139339447021,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 41 |
+
4323.json,"When lenders foreclose on homes, they typically suffer losses that exceed 30 percent of the value of the home.",MOSTLY_TRUE,Real,True,thaddeus-mccotter,republican,0.52,Real,Mixed,True,False,0.8316628932952881,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 42 |
+
1955.json,"U.S. Rep. Jim Langevin didn't want a border fence to block illegal immigration ""because he is afraid that someone will get hurt trying to go around the fence.""",PANTS_FIRE,Fake,False,michael-j-gardiner,republican,0.52,Real,Mixed,False,False,0.8672749996185303,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 43 |
+
4017.json,"On the day of the New Hampshire primary in 1980, the top 13 people of Ronald Reagans staff quit.",MOSTLY_TRUE,Real,True,newt-gingrich,republican,0.52,Real,Mixed,True,False,1.1178889274597168,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 44 |
+
769.json,Barack Obama ... 96 percent of his votes have been solely along party line.,TRUE,Real,True,sarah-palin,republican,0.53,Real,Mixed,True,False,0.8946268558502197,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 45 |
+
171.json,''The leading Democratic candidate once said that the unfettered free market is the most destructive force in modern America.'',FALSE,Fake,False,rudy-giuliani,republican,0.52,Real,Mixed,False,False,0.7632577419281006,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 46 |
+
9944.json,The U.S. Constitution is the oldest written constitution still in use today among nations.,TRUE,Real,True,bob-goodlatte,republican,0.52,Real,Mixed,True,False,0.8168671131134033,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 47 |
+
7448.json,Felony crimes in the city of Atlanta are the lowest they have been since 1969.,TRUE,Real,True,kasim-reed,democrat,0.52,Real,Mixed,True,False,0.9511759281158447,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 48 |
+
2760.json,Robert Hurt supported a bill that helped the uranium industry after taking contributions from the industry and because his father had a stake in it.,BARELY_TRUE,Fake,Mixed,sierra-club,none,0.52,Real,Mixed,False,True,1.0464277267456055,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 49 |
+
10283.json,"In 1990, the U.S. led the world in the percentage of 25-34 year olds with college degrees. Today we are in 12th place.",MOSTLY_TRUE,Real,True,bernie-s,independent,0.54,Real,Mixed,True,False,0.966181755065918,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 50 |
+
10271.json,"Atlanta Mayor Kasim Reed became a city resident in the last annexation, opening the door to his candidacy.",MOSTLY_TRUE,Real,True,keisha-lance-bottoms,democrat,0.52,Real,Mixed,True,False,0.9616649150848389,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 51 |
+
9788.json,Ive been here almost every day.,BARELY_TRUE,Fake,Mixed,jay-nixon,democrat,0.53,Real,Mixed,False,True,1.5777778625488281,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 52 |
+
8434.json,"Mark Herring voted to allow sex offenders, including rapists, to serve shorter sentences.",BARELY_TRUE,Fake,Mixed,mark-obenshain,republican,0.52,Real,Mixed,False,True,0.902681827545166,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 53 |
+
4189.json,"Says U.S. Rep. Ron Kinds stimulus cost taxpayers $278,000 per job.",FALSE,Fake,False,national-republican-congressional-committee,republican,0.52,Real,Mixed,False,False,0.9022071361541748,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 54 |
+
11867.json,Says CNN reported Ben Carson was taking a break from campaigning and the Cruz campaign forwarded that news to our volunteers.,FALSE,Fake,False,ted-cruz,republican,0.52,Real,Mixed,False,False,1.0397439002990723,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 55 |
+
7285.json,"Says Chris Christies plan to kick-start our economy is to propose an income tax cut that disproportionately benefits the wealthy, and...hes still proposing it.",PANTS_FIRE,Fake,False,barbara-buono,democrat,0.54,Real,Mixed,False,False,1.4161858558654785,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 56 |
+
1444.json,"Obama came into office ""with a $1.3 trillion deficit before I had passed any law. ... We came in with $8 trillion worth of debt over the next decade.""",MOSTLY_TRUE,Real,True,barack-obama,democrat,0.54,Real,Mixed,True,False,0.9389801025390625,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 57 |
+
7537.json,"The United States spends $2.2 billion on free cell phones, $27 million on Moroccan pottery classes and pays for the travel expenses for the watermelon queen in Alabama.",HALF_TRUE,Real,Mixed,sean-duffy,republican,0.54,Real,Mixed,True,True,1.04280686378479,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 58 |
+
2796.json,Half of children struck by cars near schools are hit by parents driving children to school.,FALSE,Fake,False,safe-routes-schools,none,0.52,Real,Mixed,False,False,1.2210850715637207,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 59 |
+
11150.json,Caution: Kissing and cuddling chickens can be hazardous to your health.,TRUE,Real,True,centers-disease-control,none,0.52,Real,Mixed,True,False,0.8850150108337402,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 60 |
+
12650.json,You know what (Saddam Hussein) did well? He killed terrorists.,BARELY_TRUE,Fake,Mixed,donald-trump,republican,0.52,Real,Mixed,False,True,0.8362798690795898,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 61 |
+
3.json,"New Mexico was 46th in teacher pay (when he was elected), now we're 29th.",MOSTLY_TRUE,Real,True,bill-richardson,democrat,0.52,Real,Mixed,True,False,0.9594571590423584,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 62 |
+
1512.json,Fifty-seven of Rubios 100 ideas ultimately became law.,HALF_TRUE,Real,Mixed,marco-rubio,republican,0.52,Real,Mixed,True,True,1.4569880962371826,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 63 |
+
3235.json,"Under the White Houses budget proposal, we will not be adding more to the national debt by the middle of the decade.",FALSE,Fake,False,barack-obama,democrat,0.36,Fake,Mixed,True,False,1.4381189346313477,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 64 |
+
7909.json,"Says that when the Rolling Stones performed in an Austin park, they paid $25,000 to the nearby city of Rollingwood for one night of inconvenience.",TRUE,Real,True,barry-bone,none,0.52,Real,Mixed,True,False,1.264172077178955,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 65 |
+
6812.json,"The Obama administrations green stimulus program funneled close to $2 billion dollars to overseas firms, creating thousands of jobs in China.",BARELY_TRUE,Fake,Mixed,americans-tax-reform,none,0.53,Real,Mixed,False,True,1.306333303451538,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 66 |
+
7401.json,"Says, Statistics indicate that one in eight children, and one in 18 adults in Oregon suffers from mental illness.",MOSTLY_TRUE,Real,True,peter-courtney,democrat,0.52,Real,Mixed,True,False,1.0519630908966064,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 67 |
+
4391.json,Says Barack Obamas favorability rating in Israel once clocked in at 4 percent.,BARELY_TRUE,Fake,Mixed,kinky-friedman,democrat,0.53,Real,Mixed,False,True,0.9937312602996826,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 68 |
+
9472.json,Oregon school districts are directing 24 percent of their food budgets to purchase local foods. Thats the highest percentage in the country.,HALF_TRUE,Real,Mixed,oregon-department-agriculture,government-body,0.54,Real,Mixed,True,True,1.0283551216125488,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 69 |
+
12834.json,On whether hes had a relationship with Vladimir Putin.,FALSE,Fake,False,donald-trump,republican,0.35,Fake,Mixed,True,False,1.1149241924285889,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 70 |
+
5363.json,Obamacare ... will kill jobs across America.,FALSE,Fake,False,us-chamber-commerce,none,0.53,Real,Mixed,False,False,1.557218074798584,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 71 |
+
9080.json,You can buy lobster with food stamps.,TRUE,Real,True,greg-morris,republican,0.69,Real,True,True,True,1.4478328227996826,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 72 |
+
12648.json,"Many Nevadans relied on Uber for work, but after accepting $70,000 from taxi companies, Catherine Cortez Masto went after Uber ... (driving) them out of town.",BARELY_TRUE,Fake,Mixed,freedom-partners-action-fund,organization,0.54,Real,Mixed,False,True,1.1827037334442139,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 73 |
+
3330.json,Says Tri-Rail shows the potential problems with investing in high-speed rail.,BARELY_TRUE,Fake,Mixed,rick-scott,republican,0.52,Real,Mixed,False,True,1.007385015487671,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 74 |
+
6045.json,Says Hawaii Gov. Neil Abercrombie made a late-night visit to Kinkos to forge President Barack Obamas birth certificate two days before Obama unveiled it to the media.,PANTS_FIRE,Fake,False,chain-email,none,0.52,Real,Mixed,False,False,1.081031322479248,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 75 |
+
8606.json,Republicans have proposed dozens of (health care) solutions designed to help control costs and improve quality.,HALF_TRUE,Real,Mixed,ron-johnson,republican,0.53,Real,Mixed,True,True,1.0788249969482422,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 76 |
+
13484.json,"Harambe received 15,000 votes in the presidential election.",PANTS_FIRE,Fake,False,blog-posting,none,0.36,Fake,Mixed,True,False,0.9677271842956543,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 77 |
+
12168.json,Expanding the sale of full-strength beer and wine to grocery stores isgood for everybody.,HALF_TRUE,Real,Mixed,your-choice-colorado,organization,0.52,Real,Mixed,True,True,0.8326687812805176,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 78 |
+
2252.json,Republicans tried to block the deficit commission.,HALF_TRUE,Real,Mixed,tim-kaine,democrat,0.53,Real,Mixed,True,True,1.0242087841033936,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 79 |
+
7696.json,Not one dime gets added to the deficit because of Social Security.,BARELY_TRUE,Fake,Mixed,mark-pocan,democrat,0.53,Real,Mixed,False,True,1.0535731315612793,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 80 |
+
13186.json,"Says Donald Trump proposes to deport 16 million people, 11 million who are here without documents, and both Donald Trump and Mike Pence want to get rid of birthright citizenship.",HALF_TRUE,Real,Mixed,tim-kaine,democrat,0.52,Real,Mixed,True,True,1.6496412754058838,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 81 |
+
6678.json,"The Senate has not passed a budget in more than three years, not a good budget, not a bad budget, no budget.",TRUE,Real,True,leonard-lance,republican,0.52,Real,Mixed,True,False,1.0171582698822021,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 82 |
+
2830.json,A proposal to reduce the deficit involved large tax cuts for the top bracket.,HALF_TRUE,Real,Mixed,paul-krugman,none,0.52,Real,Mixed,True,True,1.0472040176391602,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 83 |
+
9112.json,22 Countries Invaded by the U.S. in 20 Years.,FALSE,Fake,False,facebook-posts,none,0.53,Real,Mixed,False,False,1.4772350788116455,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 84 |
+
2774.json,On toll roads.,HALF_TRUE,Real,Mixed,scott-walker,republican,0.52,Real,Mixed,True,True,1.2251579761505127,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 85 |
+
4698.json,"Says state Senate President Stephen Sweeney gave us the nations highest income tax rates, driving out jobs.",FALSE,Fake,False,americans-prosperity-new-jersey,none,0.52,Real,Mixed,False,False,1.2662239074707031,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 86 |
+
1066.json,ACORN will be a paid partner with the Census Bureau and they will be in charge of going door-to-door and collecting data from the American public.,PANTS_FIRE,Fake,False,michele-bachmann,republican,0.52,Real,Mixed,False,False,0.9788448810577393,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 87 |
+
13155.json,Says Ron Johnson helped companies ship jobs overseas.,FALSE,Fake,False,russ-feingold,democrat,0.36,Fake,Mixed,True,False,1.1062400341033936,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 88 |
+
13047.json,Barack Obama has doubled our national debt. Doubled it. Its going to be close to $20 trillion when he leaves.,HALF_TRUE,Real,Mixed,donald-trump,republican,0.54,Real,Mixed,True,True,1.301180124282837,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 89 |
+
13270.json,We know that more than half of Hillary Clintons meetings while she was secretary of state were given to major contributors to the Clinton Foundation.,BARELY_TRUE,Fake,Mixed,mike-pence,republican,0.53,Real,Mixed,False,True,1.4784371852874756,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 90 |
+
8798.json,And weve constantly been the lowest unemployed county in the state.,FALSE,Fake,False,bob-terry,none,0.53,Real,Mixed,False,False,1.0450530052185059,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 91 |
+
1652.json,On recess appointments.,HALF_TRUE,Real,Mixed,barack-obama,democrat,0.52,Real,Mixed,True,True,1.153883934020996,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 92 |
+
5824.json,We didnt go out asking people to join the stand your ground task force.,FALSE,Fake,False,jennifer-carroll,republican,0.53,Real,Mixed,False,False,1.2436339855194092,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 93 |
+
12511.json,There is no system to vet refugees from the Middle East.,FALSE,Fake,False,donald-trump,republican,0.35,Fake,Mixed,True,False,1.1522879600524902,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 94 |
+
2652.json,Says Rick Perry recently said he wanted another (Texas) business tax.,PANTS_FIRE,Fake,False,kathie-glass,libertarian,0.52,Real,Mixed,False,False,1.1576693058013916,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 95 |
+
6141.json,Federal health care reforms amounted to the government takeover of health care.,PANTS_FIRE,Fake,False,tommy-thompson,republican,0.53,Real,Mixed,False,False,1.3367388248443604,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 96 |
+
1005.json,Obama used $20 million in federal money to emmigrate (sic) Hamas Refugees to the USA.,PANTS_FIRE,Fake,False,chain-email,none,0.53,Real,Mixed,False,False,1.0034828186035156,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 97 |
+
1760.json,"Says in 2003 Texas cut $10 billion out of the entire budget, yet we put $1.8 billion more into public education. We put $800 million more into health and human services.",BARELY_TRUE,Fake,Mixed,rick-perry,republican,0.54,Real,Mixed,False,True,1.3652348518371582,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 98 |
+
13486.json,Alexander Hamilton was an immigration hawk.,HALF_TRUE,Real,Mixed,rush-limbaugh,none,0.52,Real,Mixed,True,True,1.230626106262207,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 99 |
+
7790.json,Boston Marathon bombing suspect Tamerlan Tsarnaev is buried not far from President Kennedys grave.,PANTS_FIRE,Fake,False,john-depetro,talk-show-host,0.52,Real,Mixed,False,False,1.2205047607421875,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 100 |
+
10512.json,The minimum wage has risen $2.35 in the last two years. Thats 31 percent.,FALSE,Fake,False,leonidas-raptakis,democrat,0.54,Real,Mixed,False,False,1.1163349151611328,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
| 101 |
+
4337.json,"Before Medicare, only 51 percent of Americans 65 and older had health care coverage and nearly 30 percent lived below the poverty line. Today, thanks to Medicare ... nearly all seniors have coverage a",HALF_TRUE,Real,Mixed,ron-kind,democrat,0.54,Real,Mixed,True,True,1.205260992050171,,,"{'score': 0.0, 'label': 'Neutral', 'method': 'Heuristic'}"
|
syscred/liar_benchmark_test.json
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"timestamp": "2026-02-03T11:06:54.379689",
|
| 3 |
+
"dataset": "LIAR",
|
| 4 |
+
"metrics": {
|
| 5 |
+
"total_statements": 100,
|
| 6 |
+
"successful_evaluations": 100,
|
| 7 |
+
"error_count": 0,
|
| 8 |
+
"error_rate": 0.0,
|
| 9 |
+
"binary": {
|
| 10 |
+
"accuracy": 0.57,
|
| 11 |
+
"precision": 1.0,
|
| 12 |
+
"recall": 0.1568627450980392,
|
| 13 |
+
"f1": 0.2711864406779661,
|
| 14 |
+
"confusion_matrix": [
|
| 15 |
+
[
|
| 16 |
+
8,
|
| 17 |
+
43
|
| 18 |
+
],
|
| 19 |
+
[
|
| 20 |
+
0,
|
| 21 |
+
49
|
| 22 |
+
]
|
| 23 |
+
]
|
| 24 |
+
},
|
| 25 |
+
"ternary": {
|
| 26 |
+
"accuracy": 0.34,
|
| 27 |
+
"macro_f1": 0.18686868686868685,
|
| 28 |
+
"confusion_matrix": [
|
| 29 |
+
[
|
| 30 |
+
0,
|
| 31 |
+
35,
|
| 32 |
+
0
|
| 33 |
+
],
|
| 34 |
+
[
|
| 35 |
+
0,
|
| 36 |
+
33,
|
| 37 |
+
0
|
| 38 |
+
],
|
| 39 |
+
[
|
| 40 |
+
0,
|
| 41 |
+
31,
|
| 42 |
+
1
|
| 43 |
+
]
|
| 44 |
+
]
|
| 45 |
+
},
|
| 46 |
+
"classification_report": {
|
| 47 |
+
"Fake": {
|
| 48 |
+
"precision": 1.0,
|
| 49 |
+
"recall": 0.1568627450980392,
|
| 50 |
+
"f1-score": 0.2711864406779661,
|
| 51 |
+
"support": 51.0
|
| 52 |
+
},
|
| 53 |
+
"Real": {
|
| 54 |
+
"precision": 0.532608695652174,
|
| 55 |
+
"recall": 1.0,
|
| 56 |
+
"f1-score": 0.6950354609929078,
|
| 57 |
+
"support": 49.0
|
| 58 |
+
},
|
| 59 |
+
"accuracy": 0.57,
|
| 60 |
+
"macro avg": {
|
| 61 |
+
"precision": 0.7663043478260869,
|
| 62 |
+
"recall": 0.5784313725490196,
|
| 63 |
+
"f1-score": 0.483110950835437,
|
| 64 |
+
"support": 100.0
|
| 65 |
+
},
|
| 66 |
+
"weighted avg": {
|
| 67 |
+
"precision": 0.7709782608695653,
|
| 68 |
+
"recall": 0.57,
|
| 69 |
+
"f1-score": 0.4788724606322875,
|
| 70 |
+
"support": 100.0
|
| 71 |
+
}
|
| 72 |
+
},
|
| 73 |
+
"score_distribution": {
|
| 74 |
+
"mean": 0.5134000000000001,
|
| 75 |
+
"min": 0.35,
|
| 76 |
+
"max": 0.69,
|
| 77 |
+
"median": 0.52
|
| 78 |
+
},
|
| 79 |
+
"per_party": {
|
| 80 |
+
"republican": {
|
| 81 |
+
"count": 43,
|
| 82 |
+
"accuracy": 0.5581395348837209
|
| 83 |
+
},
|
| 84 |
+
"democrat": {
|
| 85 |
+
"count": 27,
|
| 86 |
+
"accuracy": 0.6296296296296297
|
| 87 |
+
}
|
| 88 |
+
},
|
| 89 |
+
"elapsed_time": 108.7882571220398,
|
| 90 |
+
"statements_per_second": 0.9192168589282478
|
| 91 |
+
},
|
| 92 |
+
"config": {
|
| 93 |
+
"threshold": 0.5,
|
| 94 |
+
"use_graphrag": true,
|
| 95 |
+
"weights": {
|
| 96 |
+
"source_reputation": 0.22,
|
| 97 |
+
"domain_age": 0.08,
|
| 98 |
+
"sentiment_neutrality": 0.13,
|
| 99 |
+
"entity_presence": 0.13,
|
| 100 |
+
"coherence": 0.12,
|
| 101 |
+
"fact_check": 0.17,
|
| 102 |
+
"graph_context": 0.15
|
| 103 |
+
}
|
| 104 |
+
}
|
| 105 |
+
}
|
syscred/liar_dataset.py
ADDED
|
@@ -0,0 +1,359 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
"""
|
| 3 |
+
LIAR Dataset Module - SysCRED
|
| 4 |
+
==============================
|
| 5 |
+
Loader for the LIAR benchmark dataset (Wang, 2017).
|
| 6 |
+
Standard benchmark for fake news detection with 12,800+ political statements.
|
| 7 |
+
|
| 8 |
+
Dataset: https://www.cs.ucsb.edu/~william/data/liar_dataset.zip
|
| 9 |
+
Paper: "Liar, Liar Pants on Fire" (ACL 2017)
|
| 10 |
+
|
| 11 |
+
(c) Dominique S. Loyer - PhD Thesis Prototype
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import os
|
| 15 |
+
import csv
|
| 16 |
+
from pathlib import Path
|
| 17 |
+
from typing import List, Dict, Optional, Tuple
|
| 18 |
+
from dataclasses import dataclass, field
|
| 19 |
+
from enum import Enum
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class LiarLabel(Enum):
|
| 23 |
+
"""Six-way truthfulness labels from PolitiFact."""
|
| 24 |
+
PANTS_FIRE = 0
|
| 25 |
+
FALSE = 1
|
| 26 |
+
BARELY_TRUE = 2
|
| 27 |
+
HALF_TRUE = 3
|
| 28 |
+
MOSTLY_TRUE = 4
|
| 29 |
+
TRUE = 5
|
| 30 |
+
|
| 31 |
+
@classmethod
|
| 32 |
+
def from_string(cls, label: str) -> 'LiarLabel':
|
| 33 |
+
"""Convert string label to enum."""
|
| 34 |
+
mapping = {
|
| 35 |
+
'pants-fire': cls.PANTS_FIRE,
|
| 36 |
+
'false': cls.FALSE,
|
| 37 |
+
'barely-true': cls.BARELY_TRUE,
|
| 38 |
+
'half-true': cls.HALF_TRUE,
|
| 39 |
+
'mostly-true': cls.MOSTLY_TRUE,
|
| 40 |
+
'true': cls.TRUE
|
| 41 |
+
}
|
| 42 |
+
return mapping.get(label.lower().strip(), cls.HALF_TRUE)
|
| 43 |
+
|
| 44 |
+
def to_binary(self) -> str:
|
| 45 |
+
"""Convert to binary label (Fake/Real)."""
|
| 46 |
+
if self.value <= 2: # pants-fire, false, barely-true
|
| 47 |
+
return "Fake"
|
| 48 |
+
else: # half-true, mostly-true, true
|
| 49 |
+
return "Real"
|
| 50 |
+
|
| 51 |
+
def to_ternary(self) -> str:
|
| 52 |
+
"""Convert to ternary label (False/Mixed/True)."""
|
| 53 |
+
if self.value <= 1: # pants-fire, false
|
| 54 |
+
return "False"
|
| 55 |
+
elif self.value <= 3: # barely-true, half-true
|
| 56 |
+
return "Mixed"
|
| 57 |
+
else: # mostly-true, true
|
| 58 |
+
return "True"
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
@dataclass
|
| 62 |
+
class LiarStatement:
|
| 63 |
+
"""A single statement from the LIAR dataset."""
|
| 64 |
+
id: str
|
| 65 |
+
label: LiarLabel
|
| 66 |
+
statement: str
|
| 67 |
+
subject: str = ""
|
| 68 |
+
speaker: str = ""
|
| 69 |
+
job_title: str = ""
|
| 70 |
+
state: str = ""
|
| 71 |
+
party: str = ""
|
| 72 |
+
barely_true_count: int = 0
|
| 73 |
+
false_count: int = 0
|
| 74 |
+
half_true_count: int = 0
|
| 75 |
+
mostly_true_count: int = 0
|
| 76 |
+
pants_fire_count: int = 0
|
| 77 |
+
context: str = ""
|
| 78 |
+
|
| 79 |
+
@property
|
| 80 |
+
def binary_label(self) -> str:
|
| 81 |
+
"""Get binary label (Fake/Real)."""
|
| 82 |
+
return self.label.to_binary()
|
| 83 |
+
|
| 84 |
+
@property
|
| 85 |
+
def ternary_label(self) -> str:
|
| 86 |
+
"""Get ternary label (False/Mixed/True)."""
|
| 87 |
+
return self.label.to_ternary()
|
| 88 |
+
|
| 89 |
+
@property
|
| 90 |
+
def numeric_label(self) -> int:
|
| 91 |
+
"""Get numeric label (0-5)."""
|
| 92 |
+
return self.label.value
|
| 93 |
+
|
| 94 |
+
@property
|
| 95 |
+
def speaker_credit_history(self) -> Dict[str, int]:
|
| 96 |
+
"""Get speaker's historical credibility as a dictionary."""
|
| 97 |
+
return {
|
| 98 |
+
'barely_true': self.barely_true_count,
|
| 99 |
+
'false': self.false_count,
|
| 100 |
+
'half_true': self.half_true_count,
|
| 101 |
+
'mostly_true': self.mostly_true_count,
|
| 102 |
+
'pants_fire': self.pants_fire_count
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
def to_dict(self) -> Dict:
|
| 106 |
+
"""Convert to dictionary for JSON serialization."""
|
| 107 |
+
return {
|
| 108 |
+
'id': self.id,
|
| 109 |
+
'label': self.label.name,
|
| 110 |
+
'binary_label': self.binary_label,
|
| 111 |
+
'ternary_label': self.ternary_label,
|
| 112 |
+
'statement': self.statement,
|
| 113 |
+
'subject': self.subject,
|
| 114 |
+
'speaker': self.speaker,
|
| 115 |
+
'job_title': self.job_title,
|
| 116 |
+
'state': self.state,
|
| 117 |
+
'party': self.party,
|
| 118 |
+
'context': self.context,
|
| 119 |
+
'speaker_credit_history': self.speaker_credit_history
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
class LIARDataset:
|
| 124 |
+
"""
|
| 125 |
+
Loader for LIAR benchmark dataset.
|
| 126 |
+
|
| 127 |
+
The LIAR dataset contains 12,836 short statements labeled with
|
| 128 |
+
six fine-grained truthfulness ratings from PolitiFact.
|
| 129 |
+
|
| 130 |
+
Files expected:
|
| 131 |
+
- train.tsv (10,269 statements)
|
| 132 |
+
- valid.tsv (1,284 statements)
|
| 133 |
+
- test.tsv (1,283 statements)
|
| 134 |
+
|
| 135 |
+
Usage:
|
| 136 |
+
dataset = LIARDataset("/path/to/liar_dataset")
|
| 137 |
+
train_data = dataset.load_split("train")
|
| 138 |
+
|
| 139 |
+
for statement in train_data:
|
| 140 |
+
print(f"{statement.statement} -> {statement.label.name}")
|
| 141 |
+
"""
|
| 142 |
+
|
| 143 |
+
# TSV column indices
|
| 144 |
+
COL_ID = 0
|
| 145 |
+
COL_LABEL = 1
|
| 146 |
+
COL_STATEMENT = 2
|
| 147 |
+
COL_SUBJECT = 3
|
| 148 |
+
COL_SPEAKER = 4
|
| 149 |
+
COL_JOB = 5
|
| 150 |
+
COL_STATE = 6
|
| 151 |
+
COL_PARTY = 7
|
| 152 |
+
COL_BARELY_TRUE = 8
|
| 153 |
+
COL_FALSE = 9
|
| 154 |
+
COL_HALF_TRUE = 10
|
| 155 |
+
COL_MOSTLY_TRUE = 11
|
| 156 |
+
COL_PANTS_FIRE = 12
|
| 157 |
+
COL_CONTEXT = 13
|
| 158 |
+
|
| 159 |
+
def __init__(self, data_dir: Optional[str] = None):
|
| 160 |
+
"""
|
| 161 |
+
Initialize LIAR dataset loader.
|
| 162 |
+
|
| 163 |
+
Args:
|
| 164 |
+
data_dir: Path to directory containing train.tsv, valid.tsv, test.tsv
|
| 165 |
+
If None, uses default location: syscred/datasets/liar/
|
| 166 |
+
"""
|
| 167 |
+
if data_dir:
|
| 168 |
+
self.data_dir = Path(data_dir)
|
| 169 |
+
else:
|
| 170 |
+
# Default: relative to this file
|
| 171 |
+
self.data_dir = Path(__file__).parent / "datasets" / "liar"
|
| 172 |
+
|
| 173 |
+
self._cache: Dict[str, List[LiarStatement]] = {}
|
| 174 |
+
|
| 175 |
+
print(f"[LIAR] Dataset directory: {self.data_dir}")
|
| 176 |
+
|
| 177 |
+
def _parse_int_safe(self, value: str) -> int:
|
| 178 |
+
"""Safely parse int, returning 0 on failure."""
|
| 179 |
+
try:
|
| 180 |
+
return int(value.strip())
|
| 181 |
+
except (ValueError, AttributeError):
|
| 182 |
+
return 0
|
| 183 |
+
|
| 184 |
+
def _parse_row(self, row: List[str]) -> Optional[LiarStatement]:
|
| 185 |
+
"""Parse a single TSV row into a LiarStatement."""
|
| 186 |
+
try:
|
| 187 |
+
# Ensure we have enough columns
|
| 188 |
+
if len(row) < 3:
|
| 189 |
+
return None
|
| 190 |
+
|
| 191 |
+
# Pad row if needed
|
| 192 |
+
while len(row) < 14:
|
| 193 |
+
row.append("")
|
| 194 |
+
|
| 195 |
+
return LiarStatement(
|
| 196 |
+
id=row[self.COL_ID].strip(),
|
| 197 |
+
label=LiarLabel.from_string(row[self.COL_LABEL]),
|
| 198 |
+
statement=row[self.COL_STATEMENT].strip(),
|
| 199 |
+
subject=row[self.COL_SUBJECT].strip() if len(row) > self.COL_SUBJECT else "",
|
| 200 |
+
speaker=row[self.COL_SPEAKER].strip() if len(row) > self.COL_SPEAKER else "",
|
| 201 |
+
job_title=row[self.COL_JOB].strip() if len(row) > self.COL_JOB else "",
|
| 202 |
+
state=row[self.COL_STATE].strip() if len(row) > self.COL_STATE else "",
|
| 203 |
+
party=row[self.COL_PARTY].strip() if len(row) > self.COL_PARTY else "",
|
| 204 |
+
barely_true_count=self._parse_int_safe(row[self.COL_BARELY_TRUE]) if len(row) > self.COL_BARELY_TRUE else 0,
|
| 205 |
+
false_count=self._parse_int_safe(row[self.COL_FALSE]) if len(row) > self.COL_FALSE else 0,
|
| 206 |
+
half_true_count=self._parse_int_safe(row[self.COL_HALF_TRUE]) if len(row) > self.COL_HALF_TRUE else 0,
|
| 207 |
+
mostly_true_count=self._parse_int_safe(row[self.COL_MOSTLY_TRUE]) if len(row) > self.COL_MOSTLY_TRUE else 0,
|
| 208 |
+
pants_fire_count=self._parse_int_safe(row[self.COL_PANTS_FIRE]) if len(row) > self.COL_PANTS_FIRE else 0,
|
| 209 |
+
context=row[self.COL_CONTEXT].strip() if len(row) > self.COL_CONTEXT else ""
|
| 210 |
+
)
|
| 211 |
+
except Exception as e:
|
| 212 |
+
print(f"[LIAR] Parse error: {e}")
|
| 213 |
+
return None
|
| 214 |
+
|
| 215 |
+
def load_split(self, split: str = "test") -> List[LiarStatement]:
|
| 216 |
+
"""
|
| 217 |
+
Load a dataset split.
|
| 218 |
+
|
| 219 |
+
Args:
|
| 220 |
+
split: One of 'train', 'valid', 'test'
|
| 221 |
+
|
| 222 |
+
Returns:
|
| 223 |
+
List of LiarStatement objects
|
| 224 |
+
"""
|
| 225 |
+
if split in self._cache:
|
| 226 |
+
return self._cache[split]
|
| 227 |
+
|
| 228 |
+
file_path = self.data_dir / f"{split}.tsv"
|
| 229 |
+
|
| 230 |
+
if not file_path.exists():
|
| 231 |
+
raise FileNotFoundError(
|
| 232 |
+
f"LIAR dataset file not found: {file_path}\n"
|
| 233 |
+
f"Download from: https://www.cs.ucsb.edu/~william/data/liar_dataset.zip"
|
| 234 |
+
)
|
| 235 |
+
|
| 236 |
+
statements = []
|
| 237 |
+
|
| 238 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
| 239 |
+
reader = csv.reader(f, delimiter='\t')
|
| 240 |
+
for row in reader:
|
| 241 |
+
stmt = self._parse_row(row)
|
| 242 |
+
if stmt:
|
| 243 |
+
statements.append(stmt)
|
| 244 |
+
|
| 245 |
+
self._cache[split] = statements
|
| 246 |
+
print(f"[LIAR] Loaded {len(statements)} statements from {split}.tsv")
|
| 247 |
+
|
| 248 |
+
return statements
|
| 249 |
+
|
| 250 |
+
def get_statements(self, split: str = "test") -> List[str]:
|
| 251 |
+
"""Get just the statement texts."""
|
| 252 |
+
return [s.statement for s in self.load_split(split)]
|
| 253 |
+
|
| 254 |
+
def get_labels(self, split: str = "test", label_type: str = "binary") -> List[str]:
|
| 255 |
+
"""
|
| 256 |
+
Get labels for a split.
|
| 257 |
+
|
| 258 |
+
Args:
|
| 259 |
+
split: Dataset split
|
| 260 |
+
label_type: 'binary' (Fake/Real), 'ternary' (False/Mixed/True),
|
| 261 |
+
'six' (original 6-way), 'numeric' (0-5)
|
| 262 |
+
"""
|
| 263 |
+
statements = self.load_split(split)
|
| 264 |
+
|
| 265 |
+
if label_type == "binary":
|
| 266 |
+
return [s.binary_label for s in statements]
|
| 267 |
+
elif label_type == "ternary":
|
| 268 |
+
return [s.ternary_label for s in statements]
|
| 269 |
+
elif label_type == "numeric":
|
| 270 |
+
return [s.numeric_label for s in statements]
|
| 271 |
+
else: # six / original
|
| 272 |
+
return [s.label.name for s in statements]
|
| 273 |
+
|
| 274 |
+
def get_label_distribution(self, split: str = "test") -> Dict[str, int]:
|
| 275 |
+
"""Get count of each label in a split."""
|
| 276 |
+
statements = self.load_split(split)
|
| 277 |
+
distribution = {}
|
| 278 |
+
|
| 279 |
+
for stmt in statements:
|
| 280 |
+
label = stmt.label.name
|
| 281 |
+
distribution[label] = distribution.get(label, 0) + 1
|
| 282 |
+
|
| 283 |
+
return distribution
|
| 284 |
+
|
| 285 |
+
def get_sample(self, split: str = "test", n: int = 10) -> List[LiarStatement]:
|
| 286 |
+
"""Get a random sample of statements."""
|
| 287 |
+
import random
|
| 288 |
+
statements = self.load_split(split)
|
| 289 |
+
return random.sample(statements, min(n, len(statements)))
|
| 290 |
+
|
| 291 |
+
def get_by_party(self, split: str, party: str) -> List[LiarStatement]:
|
| 292 |
+
"""Filter statements by political party."""
|
| 293 |
+
statements = self.load_split(split)
|
| 294 |
+
return [s for s in statements if s.party.lower() == party.lower()]
|
| 295 |
+
|
| 296 |
+
def get_by_speaker(self, split: str, speaker: str) -> List[LiarStatement]:
|
| 297 |
+
"""Filter statements by speaker name."""
|
| 298 |
+
statements = self.load_split(split)
|
| 299 |
+
return [s for s in statements if speaker.lower() in s.speaker.lower()]
|
| 300 |
+
|
| 301 |
+
def iter_batches(self, split: str, batch_size: int = 32):
|
| 302 |
+
"""Iterate over statements in batches."""
|
| 303 |
+
statements = self.load_split(split)
|
| 304 |
+
|
| 305 |
+
for i in range(0, len(statements), batch_size):
|
| 306 |
+
yield statements[i:i + batch_size]
|
| 307 |
+
|
| 308 |
+
def stats(self) -> Dict[str, Any]:
|
| 309 |
+
"""Get dataset statistics."""
|
| 310 |
+
stats = {}
|
| 311 |
+
|
| 312 |
+
for split in ['train', 'valid', 'test']:
|
| 313 |
+
try:
|
| 314 |
+
statements = self.load_split(split)
|
| 315 |
+
stats[split] = {
|
| 316 |
+
'count': len(statements),
|
| 317 |
+
'label_distribution': self.get_label_distribution(split),
|
| 318 |
+
'unique_speakers': len(set(s.speaker for s in statements)),
|
| 319 |
+
'unique_parties': list(set(s.party for s in statements if s.party))
|
| 320 |
+
}
|
| 321 |
+
except FileNotFoundError:
|
| 322 |
+
stats[split] = {'error': 'File not found'}
|
| 323 |
+
|
| 324 |
+
return stats
|
| 325 |
+
|
| 326 |
+
|
| 327 |
+
# Convenience function
|
| 328 |
+
def load_liar(split: str = "test", data_dir: Optional[str] = None) -> List[LiarStatement]:
|
| 329 |
+
"""Quick loader for LIAR dataset."""
|
| 330 |
+
dataset = LIARDataset(data_dir)
|
| 331 |
+
return dataset.load_split(split)
|
| 332 |
+
|
| 333 |
+
|
| 334 |
+
if __name__ == "__main__":
|
| 335 |
+
print("=" * 60)
|
| 336 |
+
print("LIAR Dataset Loader - Test")
|
| 337 |
+
print("=" * 60)
|
| 338 |
+
|
| 339 |
+
# Test with default path
|
| 340 |
+
try:
|
| 341 |
+
dataset = LIARDataset()
|
| 342 |
+
|
| 343 |
+
print("\n📊 Dataset Statistics:")
|
| 344 |
+
stats = dataset.stats()
|
| 345 |
+
for split, info in stats.items():
|
| 346 |
+
print(f"\n{split.upper()}:")
|
| 347 |
+
if 'error' in info:
|
| 348 |
+
print(f" ❌ {info['error']}")
|
| 349 |
+
else:
|
| 350 |
+
print(f" Total: {info['count']}")
|
| 351 |
+
print(f" Speakers: {info['unique_speakers']}")
|
| 352 |
+
print(f" Parties: {info['unique_parties']}")
|
| 353 |
+
print(f" Labels: {info['label_distribution']}")
|
| 354 |
+
|
| 355 |
+
except Exception as e:
|
| 356 |
+
print(f"\n❌ Error: {e}")
|
| 357 |
+
print("\nTo use this module, download the LIAR dataset:")
|
| 358 |
+
print(" wget https://www.cs.ucsb.edu/~william/data/liar_dataset.zip")
|
| 359 |
+
print(" unzip liar_dataset.zip -d 02_Code/syscred/datasets/liar/")
|
syscred/run_liar_benchmark.py
ADDED
|
@@ -0,0 +1,434 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
"""
|
| 4 |
+
LIAR Benchmark Runner - SysCRED
|
| 5 |
+
================================
|
| 6 |
+
Scientific evaluation of SysCRED on the LIAR benchmark dataset.
|
| 7 |
+
|
| 8 |
+
Usage:
|
| 9 |
+
python run_liar_benchmark.py --split test
|
| 10 |
+
python run_liar_benchmark.py --sample 100 --verbose
|
| 11 |
+
python run_liar_benchmark.py --split test --output results/liar_benchmark.csv
|
| 12 |
+
|
| 13 |
+
(c) Dominique S. Loyer - PhD Thesis Prototype
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
import argparse
|
| 17 |
+
import json
|
| 18 |
+
import time
|
| 19 |
+
import sys
|
| 20 |
+
from pathlib import Path
|
| 21 |
+
from datetime import datetime
|
| 22 |
+
from typing import Dict, List, Any, Optional
|
| 23 |
+
from collections import Counter
|
| 24 |
+
|
| 25 |
+
# Add parent to path for imports
|
| 26 |
+
sys.path.insert(0, str(Path(__file__).parent.parent))
|
| 27 |
+
|
| 28 |
+
try:
|
| 29 |
+
import pandas as pd
|
| 30 |
+
HAS_PANDAS = True
|
| 31 |
+
except ImportError:
|
| 32 |
+
HAS_PANDAS = False
|
| 33 |
+
print("[Warning] pandas not installed. CSV export will be limited.")
|
| 34 |
+
|
| 35 |
+
try:
|
| 36 |
+
from sklearn.metrics import (
|
| 37 |
+
accuracy_score, precision_score, recall_score, f1_score,
|
| 38 |
+
confusion_matrix, classification_report
|
| 39 |
+
)
|
| 40 |
+
HAS_SKLEARN = True
|
| 41 |
+
except ImportError:
|
| 42 |
+
HAS_SKLEARN = False
|
| 43 |
+
print("[Warning] sklearn not installed. Using basic metrics.")
|
| 44 |
+
|
| 45 |
+
from syscred.liar_dataset import LIARDataset, LiarStatement
|
| 46 |
+
from syscred.verification_system import CredibilityVerificationSystem
|
| 47 |
+
from syscred import config
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
class LIARBenchmark:
|
| 51 |
+
"""
|
| 52 |
+
Benchmark runner for evaluating SysCRED on LIAR dataset.
|
| 53 |
+
"""
|
| 54 |
+
|
| 55 |
+
# Map SysCRED score to binary label
|
| 56 |
+
SYSCRED_THRESHOLD = 0.5 # Below = Fake, Above = Real
|
| 57 |
+
|
| 58 |
+
def __init__(
|
| 59 |
+
self,
|
| 60 |
+
data_dir: Optional[str] = None,
|
| 61 |
+
load_ml: bool = True,
|
| 62 |
+
use_graphrag: bool = True
|
| 63 |
+
):
|
| 64 |
+
"""
|
| 65 |
+
Initialize benchmark.
|
| 66 |
+
|
| 67 |
+
Args:
|
| 68 |
+
data_dir: Path to LIAR dataset directory
|
| 69 |
+
load_ml: Whether to load ML models
|
| 70 |
+
use_graphrag: Whether to use GraphRAG context
|
| 71 |
+
"""
|
| 72 |
+
print("=" * 60)
|
| 73 |
+
print("SysCRED LIAR Benchmark Runner")
|
| 74 |
+
print("=" * 60)
|
| 75 |
+
|
| 76 |
+
# Load dataset
|
| 77 |
+
self.dataset = LIARDataset(data_dir)
|
| 78 |
+
|
| 79 |
+
# Initialize SysCRED
|
| 80 |
+
print("\n[Benchmark] Initializing SysCRED...")
|
| 81 |
+
self.system = CredibilityVerificationSystem(
|
| 82 |
+
ontology_base_path=str(config.Config.ONTOLOGY_BASE_PATH),
|
| 83 |
+
ontology_data_path=str(config.Config.ONTOLOGY_DATA_PATH),
|
| 84 |
+
load_ml_models=load_ml,
|
| 85 |
+
google_api_key=config.Config.GOOGLE_FACT_CHECK_API_KEY
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
self.use_graphrag = use_graphrag
|
| 89 |
+
self.results: List[Dict] = []
|
| 90 |
+
|
| 91 |
+
print("[Benchmark] System ready.\n")
|
| 92 |
+
|
| 93 |
+
def _syscred_to_binary(self, score: float) -> str:
|
| 94 |
+
"""Convert SysCRED score to binary label."""
|
| 95 |
+
return "Real" if score >= self.SYSCRED_THRESHOLD else "Fake"
|
| 96 |
+
|
| 97 |
+
def _syscred_to_ternary(self, score: float) -> str:
|
| 98 |
+
"""Convert SysCRED score to ternary label."""
|
| 99 |
+
if score >= 0.65:
|
| 100 |
+
return "True"
|
| 101 |
+
elif score >= 0.35:
|
| 102 |
+
return "Mixed"
|
| 103 |
+
else:
|
| 104 |
+
return "False"
|
| 105 |
+
|
| 106 |
+
def evaluate_statement(self, statement: LiarStatement) -> Dict[str, Any]:
|
| 107 |
+
"""
|
| 108 |
+
Evaluate a single statement.
|
| 109 |
+
|
| 110 |
+
Args:
|
| 111 |
+
statement: LiarStatement to evaluate
|
| 112 |
+
|
| 113 |
+
Returns:
|
| 114 |
+
Result dictionary with prediction and ground truth
|
| 115 |
+
"""
|
| 116 |
+
start_time = time.time()
|
| 117 |
+
|
| 118 |
+
result = {
|
| 119 |
+
'id': statement.id,
|
| 120 |
+
'statement': statement.statement[:200],
|
| 121 |
+
'ground_truth_6way': statement.label.name,
|
| 122 |
+
'ground_truth_binary': statement.binary_label,
|
| 123 |
+
'ground_truth_ternary': statement.ternary_label,
|
| 124 |
+
'speaker': statement.speaker,
|
| 125 |
+
'party': statement.party,
|
| 126 |
+
'syscred_score': 0.5,
|
| 127 |
+
'predicted_binary': 'Unknown',
|
| 128 |
+
'predicted_ternary': 'Unknown',
|
| 129 |
+
'binary_correct': False,
|
| 130 |
+
'ternary_correct': False,
|
| 131 |
+
'processing_time': 0,
|
| 132 |
+
'error': None
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
try:
|
| 136 |
+
# Run SysCRED analysis on the statement text
|
| 137 |
+
# Note: LIAR statements are short claims, not URLs
|
| 138 |
+
report = self.system.verify_information(statement.statement)
|
| 139 |
+
|
| 140 |
+
if 'error' not in report:
|
| 141 |
+
score = report.get('scoreCredibilite', 0.5)
|
| 142 |
+
result['syscred_score'] = score
|
| 143 |
+
result['predicted_binary'] = self._syscred_to_binary(score)
|
| 144 |
+
result['predicted_ternary'] = self._syscred_to_ternary(score)
|
| 145 |
+
|
| 146 |
+
# Check correctness
|
| 147 |
+
result['binary_correct'] = (result['predicted_binary'] == result['ground_truth_binary'])
|
| 148 |
+
result['ternary_correct'] = (result['predicted_ternary'] == result['ground_truth_ternary'])
|
| 149 |
+
|
| 150 |
+
# Add extra details if available
|
| 151 |
+
if 'analyseNLP' in report:
|
| 152 |
+
result['sentiment'] = report['analyseNLP'].get('sentiment', {})
|
| 153 |
+
result['bias'] = report['analyseNLP'].get('bias_analysis', {})
|
| 154 |
+
else:
|
| 155 |
+
result['error'] = report['error']
|
| 156 |
+
|
| 157 |
+
except Exception as e:
|
| 158 |
+
result['error'] = str(e)
|
| 159 |
+
|
| 160 |
+
result['processing_time'] = time.time() - start_time
|
| 161 |
+
|
| 162 |
+
return result
|
| 163 |
+
|
| 164 |
+
def run_benchmark(
|
| 165 |
+
self,
|
| 166 |
+
split: str = "test",
|
| 167 |
+
sample_size: Optional[int] = None,
|
| 168 |
+
verbose: bool = False
|
| 169 |
+
) -> Dict[str, Any]:
|
| 170 |
+
"""
|
| 171 |
+
Run full benchmark on a dataset split.
|
| 172 |
+
|
| 173 |
+
Args:
|
| 174 |
+
split: 'train', 'valid', or 'test'
|
| 175 |
+
sample_size: If set, only evaluate this many statements
|
| 176 |
+
verbose: Print progress for each statement
|
| 177 |
+
|
| 178 |
+
Returns:
|
| 179 |
+
Dictionary with metrics and detailed results
|
| 180 |
+
"""
|
| 181 |
+
print(f"\n[Benchmark] Running on {split} split...")
|
| 182 |
+
|
| 183 |
+
# Load dataset
|
| 184 |
+
statements = self.dataset.load_split(split)
|
| 185 |
+
|
| 186 |
+
if sample_size:
|
| 187 |
+
import random
|
| 188 |
+
statements = random.sample(statements, min(sample_size, len(statements)))
|
| 189 |
+
print(f"[Benchmark] Using sample of {len(statements)} statements")
|
| 190 |
+
|
| 191 |
+
total = len(statements)
|
| 192 |
+
self.results = []
|
| 193 |
+
|
| 194 |
+
# Progress tracking
|
| 195 |
+
start_time = time.time()
|
| 196 |
+
|
| 197 |
+
for i, stmt in enumerate(statements):
|
| 198 |
+
if verbose or (i + 1) % 50 == 0:
|
| 199 |
+
print(f"[{i+1}/{total}] Processing: {stmt.statement[:50]}...")
|
| 200 |
+
|
| 201 |
+
result = self.evaluate_statement(stmt)
|
| 202 |
+
self.results.append(result)
|
| 203 |
+
|
| 204 |
+
if verbose:
|
| 205 |
+
symbol = "✅" if result['binary_correct'] else "❌"
|
| 206 |
+
print(f" -> Score: {result['syscred_score']:.2f} | "
|
| 207 |
+
f"Pred: {result['predicted_binary']} | "
|
| 208 |
+
f"True: {result['ground_truth_binary']} {symbol}")
|
| 209 |
+
|
| 210 |
+
elapsed = time.time() - start_time
|
| 211 |
+
|
| 212 |
+
# Calculate metrics
|
| 213 |
+
metrics = self._calculate_metrics()
|
| 214 |
+
metrics['elapsed_time'] = elapsed
|
| 215 |
+
metrics['statements_per_second'] = total / elapsed if elapsed > 0 else 0
|
| 216 |
+
|
| 217 |
+
return metrics
|
| 218 |
+
|
| 219 |
+
def _calculate_metrics(self) -> Dict[str, Any]:
|
| 220 |
+
"""Calculate evaluation metrics from results."""
|
| 221 |
+
|
| 222 |
+
if not self.results:
|
| 223 |
+
return {'error': 'No results to evaluate'}
|
| 224 |
+
|
| 225 |
+
# Filter successful evaluations
|
| 226 |
+
valid_results = [r for r in self.results if r['error'] is None]
|
| 227 |
+
error_count = len(self.results) - len(valid_results)
|
| 228 |
+
|
| 229 |
+
if not valid_results:
|
| 230 |
+
return {'error': 'All evaluations failed'}
|
| 231 |
+
|
| 232 |
+
metrics = {
|
| 233 |
+
'total_statements': len(self.results),
|
| 234 |
+
'successful_evaluations': len(valid_results),
|
| 235 |
+
'error_count': error_count,
|
| 236 |
+
'error_rate': error_count / len(self.results)
|
| 237 |
+
}
|
| 238 |
+
|
| 239 |
+
# Extract labels
|
| 240 |
+
y_true_binary = [r['ground_truth_binary'] for r in valid_results]
|
| 241 |
+
y_pred_binary = [r['predicted_binary'] for r in valid_results]
|
| 242 |
+
|
| 243 |
+
y_true_ternary = [r['ground_truth_ternary'] for r in valid_results]
|
| 244 |
+
y_pred_ternary = [r['predicted_ternary'] for r in valid_results]
|
| 245 |
+
|
| 246 |
+
# Binary metrics
|
| 247 |
+
if HAS_SKLEARN:
|
| 248 |
+
metrics['binary'] = {
|
| 249 |
+
'accuracy': accuracy_score(y_true_binary, y_pred_binary),
|
| 250 |
+
'precision': precision_score(y_true_binary, y_pred_binary, pos_label='Fake', zero_division=0),
|
| 251 |
+
'recall': recall_score(y_true_binary, y_pred_binary, pos_label='Fake', zero_division=0),
|
| 252 |
+
'f1': f1_score(y_true_binary, y_pred_binary, pos_label='Fake', zero_division=0),
|
| 253 |
+
'confusion_matrix': confusion_matrix(y_true_binary, y_pred_binary, labels=['Fake', 'Real']).tolist()
|
| 254 |
+
}
|
| 255 |
+
|
| 256 |
+
metrics['ternary'] = {
|
| 257 |
+
'accuracy': accuracy_score(y_true_ternary, y_pred_ternary),
|
| 258 |
+
'macro_f1': f1_score(y_true_ternary, y_pred_ternary, average='macro', zero_division=0),
|
| 259 |
+
'confusion_matrix': confusion_matrix(y_true_ternary, y_pred_ternary,
|
| 260 |
+
labels=['False', 'Mixed', 'True']).tolist()
|
| 261 |
+
}
|
| 262 |
+
|
| 263 |
+
# Detailed classification report
|
| 264 |
+
metrics['classification_report'] = classification_report(
|
| 265 |
+
y_true_binary, y_pred_binary,
|
| 266 |
+
target_names=['Fake', 'Real'],
|
| 267 |
+
output_dict=True
|
| 268 |
+
)
|
| 269 |
+
else:
|
| 270 |
+
# Basic metrics without sklearn
|
| 271 |
+
correct_binary = sum(1 for r in valid_results if r['binary_correct'])
|
| 272 |
+
correct_ternary = sum(1 for r in valid_results if r['ternary_correct'])
|
| 273 |
+
|
| 274 |
+
metrics['binary'] = {
|
| 275 |
+
'accuracy': correct_binary / len(valid_results),
|
| 276 |
+
'correct': correct_binary,
|
| 277 |
+
'incorrect': len(valid_results) - correct_binary
|
| 278 |
+
}
|
| 279 |
+
|
| 280 |
+
metrics['ternary'] = {
|
| 281 |
+
'accuracy': correct_ternary / len(valid_results),
|
| 282 |
+
'correct': correct_ternary,
|
| 283 |
+
'incorrect': len(valid_results) - correct_ternary
|
| 284 |
+
}
|
| 285 |
+
|
| 286 |
+
# Score distribution
|
| 287 |
+
scores = [r['syscred_score'] for r in valid_results]
|
| 288 |
+
metrics['score_distribution'] = {
|
| 289 |
+
'mean': sum(scores) / len(scores),
|
| 290 |
+
'min': min(scores),
|
| 291 |
+
'max': max(scores),
|
| 292 |
+
'median': sorted(scores)[len(scores) // 2]
|
| 293 |
+
}
|
| 294 |
+
|
| 295 |
+
# Per-party analysis
|
| 296 |
+
party_results = {}
|
| 297 |
+
for party in ['republican', 'democrat']:
|
| 298 |
+
party_items = [r for r in valid_results if r['party'].lower() == party]
|
| 299 |
+
if party_items:
|
| 300 |
+
party_correct = sum(1 for r in party_items if r['binary_correct'])
|
| 301 |
+
party_results[party] = {
|
| 302 |
+
'count': len(party_items),
|
| 303 |
+
'accuracy': party_correct / len(party_items)
|
| 304 |
+
}
|
| 305 |
+
metrics['per_party'] = party_results
|
| 306 |
+
|
| 307 |
+
return metrics
|
| 308 |
+
|
| 309 |
+
def print_results(self, metrics: Dict[str, Any]) -> None:
|
| 310 |
+
"""Pretty-print benchmark results."""
|
| 311 |
+
print("\n" + "=" * 60)
|
| 312 |
+
print("LIAR BENCHMARK RESULTS")
|
| 313 |
+
print("=" * 60)
|
| 314 |
+
|
| 315 |
+
print(f"\n📊 Overview:")
|
| 316 |
+
print(f" Total Statements: {metrics.get('total_statements', 0)}")
|
| 317 |
+
print(f" Successful: {metrics.get('successful_evaluations', 0)}")
|
| 318 |
+
print(f" Errors: {metrics.get('error_count', 0)} ({metrics.get('error_rate', 0):.1%})")
|
| 319 |
+
print(f" Processing Time: {metrics.get('elapsed_time', 0):.1f}s")
|
| 320 |
+
print(f" Speed: {metrics.get('statements_per_second', 0):.2f} stmt/sec")
|
| 321 |
+
|
| 322 |
+
if 'binary' in metrics:
|
| 323 |
+
print(f"\n📈 Binary Classification (Fake vs Real):")
|
| 324 |
+
b = metrics['binary']
|
| 325 |
+
print(f" Accuracy: {b.get('accuracy', 0):.2%}")
|
| 326 |
+
print(f" Precision: {b.get('precision', 0):.2%}")
|
| 327 |
+
print(f" Recall: {b.get('recall', 0):.2%}")
|
| 328 |
+
print(f" F1-Score: {b.get('f1', 0):.2f}")
|
| 329 |
+
|
| 330 |
+
if 'confusion_matrix' in b:
|
| 331 |
+
cm = b['confusion_matrix']
|
| 332 |
+
print(f"\n Confusion Matrix:")
|
| 333 |
+
print(f" Pred Fake Pred Real")
|
| 334 |
+
print(f" True Fake {cm[0][0]:5d} {cm[0][1]:5d}")
|
| 335 |
+
print(f" True Real {cm[1][0]:5d} {cm[1][1]:5d}")
|
| 336 |
+
|
| 337 |
+
if 'ternary' in metrics:
|
| 338 |
+
print(f"\n📊 Ternary Classification (False/Mixed/True):")
|
| 339 |
+
t = metrics['ternary']
|
| 340 |
+
print(f" Accuracy: {t.get('accuracy', 0):.2%}")
|
| 341 |
+
print(f" Macro F1: {t.get('macro_f1', 0):.2f}")
|
| 342 |
+
|
| 343 |
+
if 'per_party' in metrics:
|
| 344 |
+
print(f"\n🏛️ Per-Party Analysis:")
|
| 345 |
+
for party, data in metrics['per_party'].items():
|
| 346 |
+
print(f" {party.capitalize()}: {data['accuracy']:.2%} accuracy ({data['count']} samples)")
|
| 347 |
+
|
| 348 |
+
if 'score_distribution' in metrics:
|
| 349 |
+
print(f"\n📉 Score Distribution:")
|
| 350 |
+
sd = metrics['score_distribution']
|
| 351 |
+
print(f" Mean: {sd['mean']:.3f}")
|
| 352 |
+
print(f" Median: {sd['median']:.3f}")
|
| 353 |
+
print(f" Range: [{sd['min']:.3f}, {sd['max']:.3f}]")
|
| 354 |
+
|
| 355 |
+
print("\n" + "=" * 60)
|
| 356 |
+
|
| 357 |
+
def save_results(self, output_path: str, metrics: Dict[str, Any]) -> None:
|
| 358 |
+
"""Save results to files."""
|
| 359 |
+
output = Path(output_path)
|
| 360 |
+
output.parent.mkdir(parents=True, exist_ok=True)
|
| 361 |
+
|
| 362 |
+
# Save detailed results as CSV
|
| 363 |
+
if HAS_PANDAS and self.results:
|
| 364 |
+
df = pd.DataFrame(self.results)
|
| 365 |
+
csv_path = output.with_suffix('.csv')
|
| 366 |
+
df.to_csv(csv_path, index=False)
|
| 367 |
+
print(f"[Benchmark] Results saved to: {csv_path}")
|
| 368 |
+
|
| 369 |
+
# Save metrics as JSON
|
| 370 |
+
json_path = output.with_suffix('.json')
|
| 371 |
+
with open(json_path, 'w') as f:
|
| 372 |
+
json.dump({
|
| 373 |
+
'timestamp': datetime.now().isoformat(),
|
| 374 |
+
'dataset': 'LIAR',
|
| 375 |
+
'metrics': metrics,
|
| 376 |
+
'config': {
|
| 377 |
+
'threshold': self.SYSCRED_THRESHOLD,
|
| 378 |
+
'use_graphrag': self.use_graphrag,
|
| 379 |
+
'weights': dict(self.system.weights)
|
| 380 |
+
}
|
| 381 |
+
}, f, indent=2, default=str)
|
| 382 |
+
print(f"[Benchmark] Metrics saved to: {json_path}")
|
| 383 |
+
|
| 384 |
+
|
| 385 |
+
def main():
|
| 386 |
+
parser = argparse.ArgumentParser(description='Run LIAR benchmark on SysCRED')
|
| 387 |
+
parser.add_argument('--split', type=str, default='test',
|
| 388 |
+
choices=['train', 'valid', 'test'],
|
| 389 |
+
help='Dataset split to evaluate')
|
| 390 |
+
parser.add_argument('--sample', type=int, default=None,
|
| 391 |
+
help='Number of statements to sample (for quick testing)')
|
| 392 |
+
parser.add_argument('--data-dir', type=str, default=None,
|
| 393 |
+
help='Path to LIAR dataset directory')
|
| 394 |
+
parser.add_argument('--output', type=str, default=None,
|
| 395 |
+
help='Output path for results (CSV/JSON)')
|
| 396 |
+
parser.add_argument('--no-ml', action='store_true',
|
| 397 |
+
help='Disable ML models for faster testing')
|
| 398 |
+
parser.add_argument('--verbose', '-v', action='store_true',
|
| 399 |
+
help='Print details for each statement')
|
| 400 |
+
|
| 401 |
+
args = parser.parse_args()
|
| 402 |
+
|
| 403 |
+
# Run benchmark
|
| 404 |
+
benchmark = LIARBenchmark(
|
| 405 |
+
data_dir=args.data_dir,
|
| 406 |
+
load_ml=not args.no_ml
|
| 407 |
+
)
|
| 408 |
+
|
| 409 |
+
try:
|
| 410 |
+
metrics = benchmark.run_benchmark(
|
| 411 |
+
split=args.split,
|
| 412 |
+
sample_size=args.sample,
|
| 413 |
+
verbose=args.verbose
|
| 414 |
+
)
|
| 415 |
+
|
| 416 |
+
benchmark.print_results(metrics)
|
| 417 |
+
|
| 418 |
+
if args.output:
|
| 419 |
+
benchmark.save_results(args.output, metrics)
|
| 420 |
+
else:
|
| 421 |
+
# Default output path
|
| 422 |
+
default_output = Path(__file__).parent / f"liar_benchmark_{args.split}.csv"
|
| 423 |
+
benchmark.save_results(str(default_output), metrics)
|
| 424 |
+
|
| 425 |
+
except FileNotFoundError as e:
|
| 426 |
+
print(f"\n❌ Error: {e}")
|
| 427 |
+
print("\nTo download the LIAR dataset:")
|
| 428 |
+
print(" 1. wget https://www.cs.ucsb.edu/~william/data/liar_dataset.zip")
|
| 429 |
+
print(" 2. unzip liar_dataset.zip -d 02_Code/syscred/datasets/liar/")
|
| 430 |
+
sys.exit(1)
|
| 431 |
+
|
| 432 |
+
|
| 433 |
+
if __name__ == "__main__":
|
| 434 |
+
main()
|
syscred/run_liar_benchmark_remote.py
ADDED
|
@@ -0,0 +1,373 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
"""
|
| 4 |
+
LIAR Benchmark via Hugging Face Space API
|
| 5 |
+
==========================================
|
| 6 |
+
Runs the LIAR benchmark against the remote SysCRED instance on HF Space.
|
| 7 |
+
This uses the full ML pipeline (PyTorch, Transformers) running in the cloud.
|
| 8 |
+
|
| 9 |
+
Usage:
|
| 10 |
+
python run_liar_benchmark_remote.py --sample 100
|
| 11 |
+
python run_liar_benchmark_remote.py --split test --url https://your-space.hf.space
|
| 12 |
+
|
| 13 |
+
(c) Dominique S. Loyer - PhD Thesis Prototype
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
import argparse
|
| 17 |
+
import json
|
| 18 |
+
import time
|
| 19 |
+
import sys
|
| 20 |
+
from pathlib import Path
|
| 21 |
+
from datetime import datetime
|
| 22 |
+
from typing import Dict, List, Any, Optional
|
| 23 |
+
import requests
|
| 24 |
+
|
| 25 |
+
try:
|
| 26 |
+
import pandas as pd
|
| 27 |
+
HAS_PANDAS = True
|
| 28 |
+
except ImportError:
|
| 29 |
+
HAS_PANDAS = False
|
| 30 |
+
|
| 31 |
+
try:
|
| 32 |
+
from sklearn.metrics import (
|
| 33 |
+
accuracy_score, precision_score, recall_score, f1_score,
|
| 34 |
+
confusion_matrix, classification_report
|
| 35 |
+
)
|
| 36 |
+
HAS_SKLEARN = True
|
| 37 |
+
except ImportError:
|
| 38 |
+
HAS_SKLEARN = False
|
| 39 |
+
|
| 40 |
+
# Add parent to path for imports
|
| 41 |
+
sys.path.insert(0, str(Path(__file__).parent.parent))
|
| 42 |
+
from syscred.liar_dataset import LIARDataset, LiarStatement
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
class RemoteLIARBenchmark:
|
| 46 |
+
"""
|
| 47 |
+
Benchmark runner using remote HF Space API.
|
| 48 |
+
"""
|
| 49 |
+
|
| 50 |
+
# Default HF Space URL
|
| 51 |
+
DEFAULT_API_URL = "https://domloyer-syscred.hf.space"
|
| 52 |
+
|
| 53 |
+
SYSCRED_THRESHOLD = 0.5 # Below = Fake, Above = Real
|
| 54 |
+
|
| 55 |
+
def __init__(
|
| 56 |
+
self,
|
| 57 |
+
api_url: Optional[str] = None,
|
| 58 |
+
data_dir: Optional[str] = None,
|
| 59 |
+
timeout: int = 60
|
| 60 |
+
):
|
| 61 |
+
"""
|
| 62 |
+
Initialize remote benchmark.
|
| 63 |
+
|
| 64 |
+
Args:
|
| 65 |
+
api_url: HF Space API URL
|
| 66 |
+
data_dir: Path to LIAR dataset
|
| 67 |
+
timeout: Request timeout in seconds
|
| 68 |
+
"""
|
| 69 |
+
print("=" * 60)
|
| 70 |
+
print("SysCRED LIAR Benchmark (Remote HF Space)")
|
| 71 |
+
print("=" * 60)
|
| 72 |
+
|
| 73 |
+
self.api_url = (api_url or self.DEFAULT_API_URL).rstrip('/')
|
| 74 |
+
self.timeout = timeout
|
| 75 |
+
|
| 76 |
+
# Test connection
|
| 77 |
+
print(f"\n[Remote] API URL: {self.api_url}")
|
| 78 |
+
self._test_connection()
|
| 79 |
+
|
| 80 |
+
# Load dataset
|
| 81 |
+
self.dataset = LIARDataset(data_dir)
|
| 82 |
+
self.results: List[Dict] = []
|
| 83 |
+
|
| 84 |
+
print("[Remote] Ready.\n")
|
| 85 |
+
|
| 86 |
+
def _test_connection(self):
|
| 87 |
+
"""Test API connectivity."""
|
| 88 |
+
try:
|
| 89 |
+
response = requests.get(f"{self.api_url}/api/health", timeout=10)
|
| 90 |
+
if response.status_code == 200:
|
| 91 |
+
print("[Remote] ✓ API connection successful")
|
| 92 |
+
else:
|
| 93 |
+
print(f"[Remote] ⚠ API returned status {response.status_code}")
|
| 94 |
+
except requests.exceptions.ConnectionError:
|
| 95 |
+
print("[Remote] ⚠ Could not connect to API (may be sleeping)")
|
| 96 |
+
print("[Remote] The first request will wake it up...")
|
| 97 |
+
except Exception as e:
|
| 98 |
+
print(f"[Remote] ⚠ Connection test failed: {e}")
|
| 99 |
+
|
| 100 |
+
def _call_api(self, text: str) -> Dict[str, Any]:
|
| 101 |
+
"""Call the SysCRED API."""
|
| 102 |
+
try:
|
| 103 |
+
response = requests.post(
|
| 104 |
+
f"{self.api_url}/api/verify",
|
| 105 |
+
json={"input": text},
|
| 106 |
+
timeout=self.timeout,
|
| 107 |
+
headers={"Content-Type": "application/json"}
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
if response.status_code == 200:
|
| 111 |
+
return response.json()
|
| 112 |
+
else:
|
| 113 |
+
return {"error": f"HTTP {response.status_code}: {response.text[:100]}"}
|
| 114 |
+
|
| 115 |
+
except requests.exceptions.Timeout:
|
| 116 |
+
return {"error": "Request timeout"}
|
| 117 |
+
except requests.exceptions.ConnectionError:
|
| 118 |
+
return {"error": "Connection error"}
|
| 119 |
+
except Exception as e:
|
| 120 |
+
return {"error": str(e)}
|
| 121 |
+
|
| 122 |
+
def _syscred_to_binary(self, score: float) -> str:
|
| 123 |
+
"""Convert SysCRED score to binary label."""
|
| 124 |
+
return "Real" if score >= self.SYSCRED_THRESHOLD else "Fake"
|
| 125 |
+
|
| 126 |
+
def _syscred_to_ternary(self, score: float) -> str:
|
| 127 |
+
"""Convert SysCRED score to ternary label."""
|
| 128 |
+
if score >= 0.65:
|
| 129 |
+
return "True"
|
| 130 |
+
elif score >= 0.35:
|
| 131 |
+
return "Mixed"
|
| 132 |
+
else:
|
| 133 |
+
return "False"
|
| 134 |
+
|
| 135 |
+
def evaluate_statement(self, statement: LiarStatement) -> Dict[str, Any]:
|
| 136 |
+
"""Evaluate a single statement via API."""
|
| 137 |
+
start_time = time.time()
|
| 138 |
+
|
| 139 |
+
result = {
|
| 140 |
+
'id': statement.id,
|
| 141 |
+
'statement': statement.statement[:200],
|
| 142 |
+
'ground_truth_6way': statement.label.name,
|
| 143 |
+
'ground_truth_binary': statement.binary_label,
|
| 144 |
+
'ground_truth_ternary': statement.ternary_label,
|
| 145 |
+
'speaker': statement.speaker,
|
| 146 |
+
'party': statement.party,
|
| 147 |
+
'syscred_score': 0.5,
|
| 148 |
+
'predicted_binary': 'Unknown',
|
| 149 |
+
'predicted_ternary': 'Unknown',
|
| 150 |
+
'binary_correct': False,
|
| 151 |
+
'ternary_correct': False,
|
| 152 |
+
'processing_time': 0,
|
| 153 |
+
'error': None,
|
| 154 |
+
'ml_used': False
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
# Call remote API
|
| 158 |
+
api_result = self._call_api(statement.statement)
|
| 159 |
+
|
| 160 |
+
if 'error' not in api_result:
|
| 161 |
+
score = api_result.get('scoreCredibilite', 0.5)
|
| 162 |
+
result['syscred_score'] = score
|
| 163 |
+
result['predicted_binary'] = self._syscred_to_binary(score)
|
| 164 |
+
result['predicted_ternary'] = self._syscred_to_ternary(score)
|
| 165 |
+
|
| 166 |
+
result['binary_correct'] = (result['predicted_binary'] == result['ground_truth_binary'])
|
| 167 |
+
result['ternary_correct'] = (result['predicted_ternary'] == result['ground_truth_ternary'])
|
| 168 |
+
|
| 169 |
+
# Check if ML was used
|
| 170 |
+
nlp = api_result.get('analyseNLP', {})
|
| 171 |
+
result['ml_used'] = nlp.get('sentiment') is not None
|
| 172 |
+
|
| 173 |
+
# GraphRAG info
|
| 174 |
+
graphrag = api_result.get('graphRAG', {})
|
| 175 |
+
result['graph_context_score'] = graphrag.get('context_score')
|
| 176 |
+
result['graph_has_history'] = graphrag.get('has_history', False)
|
| 177 |
+
else:
|
| 178 |
+
result['error'] = api_result['error']
|
| 179 |
+
|
| 180 |
+
result['processing_time'] = time.time() - start_time
|
| 181 |
+
|
| 182 |
+
return result
|
| 183 |
+
|
| 184 |
+
def run_benchmark(
|
| 185 |
+
self,
|
| 186 |
+
split: str = "test",
|
| 187 |
+
sample_size: Optional[int] = None,
|
| 188 |
+
verbose: bool = False
|
| 189 |
+
) -> Dict[str, Any]:
|
| 190 |
+
"""Run benchmark against remote API."""
|
| 191 |
+
print(f"\n[Remote] Running on {split} split via HF Space API...")
|
| 192 |
+
|
| 193 |
+
statements = self.dataset.load_split(split)
|
| 194 |
+
|
| 195 |
+
if sample_size:
|
| 196 |
+
import random
|
| 197 |
+
statements = random.sample(statements, min(sample_size, len(statements)))
|
| 198 |
+
print(f"[Remote] Using sample of {len(statements)} statements")
|
| 199 |
+
|
| 200 |
+
total = len(statements)
|
| 201 |
+
self.results = []
|
| 202 |
+
ml_used_count = 0
|
| 203 |
+
|
| 204 |
+
start_time = time.time()
|
| 205 |
+
|
| 206 |
+
for i, stmt in enumerate(statements):
|
| 207 |
+
if verbose or (i + 1) % 10 == 0:
|
| 208 |
+
print(f"[{i+1}/{total}] Processing: {stmt.statement[:50]}...")
|
| 209 |
+
|
| 210 |
+
result = self.evaluate_statement(stmt)
|
| 211 |
+
self.results.append(result)
|
| 212 |
+
|
| 213 |
+
if result.get('ml_used'):
|
| 214 |
+
ml_used_count += 1
|
| 215 |
+
|
| 216 |
+
if verbose and not result.get('error'):
|
| 217 |
+
symbol = "✅" if result['binary_correct'] else "❌"
|
| 218 |
+
ml = "🧠" if result['ml_used'] else "📊"
|
| 219 |
+
print(f" -> Score: {result['syscred_score']:.2f} {ml} | "
|
| 220 |
+
f"Pred: {result['predicted_binary']} | "
|
| 221 |
+
f"True: {result['ground_truth_binary']} {symbol}")
|
| 222 |
+
|
| 223 |
+
# Rate limiting - be nice to the API
|
| 224 |
+
if i < total - 1:
|
| 225 |
+
time.sleep(0.5)
|
| 226 |
+
|
| 227 |
+
elapsed = time.time() - start_time
|
| 228 |
+
|
| 229 |
+
metrics = self._calculate_metrics()
|
| 230 |
+
metrics['elapsed_time'] = elapsed
|
| 231 |
+
metrics['statements_per_second'] = total / elapsed if elapsed > 0 else 0
|
| 232 |
+
metrics['ml_used_percentage'] = (ml_used_count / total * 100) if total > 0 else 0
|
| 233 |
+
metrics['api_url'] = self.api_url
|
| 234 |
+
|
| 235 |
+
return metrics
|
| 236 |
+
|
| 237 |
+
def _calculate_metrics(self) -> Dict[str, Any]:
|
| 238 |
+
"""Calculate evaluation metrics."""
|
| 239 |
+
if not self.results:
|
| 240 |
+
return {'error': 'No results'}
|
| 241 |
+
|
| 242 |
+
valid_results = [r for r in self.results if r['error'] is None]
|
| 243 |
+
error_count = len(self.results) - len(valid_results)
|
| 244 |
+
|
| 245 |
+
if not valid_results:
|
| 246 |
+
return {'error': 'All evaluations failed'}
|
| 247 |
+
|
| 248 |
+
metrics = {
|
| 249 |
+
'total_statements': len(self.results),
|
| 250 |
+
'successful_evaluations': len(valid_results),
|
| 251 |
+
'error_count': error_count,
|
| 252 |
+
'error_rate': error_count / len(self.results)
|
| 253 |
+
}
|
| 254 |
+
|
| 255 |
+
y_true_binary = [r['ground_truth_binary'] for r in valid_results]
|
| 256 |
+
y_pred_binary = [r['predicted_binary'] for r in valid_results]
|
| 257 |
+
|
| 258 |
+
y_true_ternary = [r['ground_truth_ternary'] for r in valid_results]
|
| 259 |
+
y_pred_ternary = [r['predicted_ternary'] for r in valid_results]
|
| 260 |
+
|
| 261 |
+
if HAS_SKLEARN:
|
| 262 |
+
metrics['binary'] = {
|
| 263 |
+
'accuracy': accuracy_score(y_true_binary, y_pred_binary),
|
| 264 |
+
'precision': precision_score(y_true_binary, y_pred_binary, pos_label='Fake', zero_division=0),
|
| 265 |
+
'recall': recall_score(y_true_binary, y_pred_binary, pos_label='Fake', zero_division=0),
|
| 266 |
+
'f1': f1_score(y_true_binary, y_pred_binary, pos_label='Fake', zero_division=0),
|
| 267 |
+
'confusion_matrix': confusion_matrix(y_true_binary, y_pred_binary, labels=['Fake', 'Real']).tolist()
|
| 268 |
+
}
|
| 269 |
+
|
| 270 |
+
metrics['ternary'] = {
|
| 271 |
+
'accuracy': accuracy_score(y_true_ternary, y_pred_ternary),
|
| 272 |
+
'macro_f1': f1_score(y_true_ternary, y_pred_ternary, average='macro', zero_division=0),
|
| 273 |
+
}
|
| 274 |
+
else:
|
| 275 |
+
correct_binary = sum(1 for r in valid_results if r['binary_correct'])
|
| 276 |
+
metrics['binary'] = {'accuracy': correct_binary / len(valid_results)}
|
| 277 |
+
|
| 278 |
+
scores = [r['syscred_score'] for r in valid_results]
|
| 279 |
+
metrics['score_distribution'] = {
|
| 280 |
+
'mean': sum(scores) / len(scores),
|
| 281 |
+
'min': min(scores),
|
| 282 |
+
'max': max(scores),
|
| 283 |
+
}
|
| 284 |
+
|
| 285 |
+
return metrics
|
| 286 |
+
|
| 287 |
+
def print_results(self, metrics: Dict[str, Any]) -> None:
|
| 288 |
+
"""Print benchmark results."""
|
| 289 |
+
print("\n" + "=" * 60)
|
| 290 |
+
print("LIAR BENCHMARK RESULTS (Remote HF Space)")
|
| 291 |
+
print("=" * 60)
|
| 292 |
+
|
| 293 |
+
print(f"\n🌐 API: {metrics.get('api_url', 'N/A')}")
|
| 294 |
+
print(f"🧠 ML Models Used: {metrics.get('ml_used_percentage', 0):.1f}%")
|
| 295 |
+
|
| 296 |
+
print(f"\n📊 Overview:")
|
| 297 |
+
print(f" Total: {metrics.get('total_statements', 0)}")
|
| 298 |
+
print(f" Success: {metrics.get('successful_evaluations', 0)}")
|
| 299 |
+
print(f" Errors: {metrics.get('error_count', 0)}")
|
| 300 |
+
print(f" Time: {metrics.get('elapsed_time', 0):.1f}s")
|
| 301 |
+
|
| 302 |
+
if 'binary' in metrics:
|
| 303 |
+
print(f"\n📈 Binary Classification:")
|
| 304 |
+
b = metrics['binary']
|
| 305 |
+
print(f" Accuracy: {b.get('accuracy', 0):.2%}")
|
| 306 |
+
print(f" Precision: {b.get('precision', 0):.2%}")
|
| 307 |
+
print(f" Recall: {b.get('recall', 0):.2%}")
|
| 308 |
+
print(f" F1-Score: {b.get('f1', 0):.2f}")
|
| 309 |
+
|
| 310 |
+
print("\n" + "=" * 60)
|
| 311 |
+
|
| 312 |
+
def save_results(self, output_path: str, metrics: Dict[str, Any]) -> None:
|
| 313 |
+
"""Save results."""
|
| 314 |
+
output = Path(output_path)
|
| 315 |
+
output.parent.mkdir(parents=True, exist_ok=True)
|
| 316 |
+
|
| 317 |
+
if HAS_PANDAS and self.results:
|
| 318 |
+
df = pd.DataFrame(self.results)
|
| 319 |
+
csv_path = output.with_suffix('.csv')
|
| 320 |
+
df.to_csv(csv_path, index=False)
|
| 321 |
+
print(f"[Remote] Results: {csv_path}")
|
| 322 |
+
|
| 323 |
+
json_path = output.with_suffix('.json')
|
| 324 |
+
with open(json_path, 'w') as f:
|
| 325 |
+
json.dump({
|
| 326 |
+
'timestamp': datetime.now().isoformat(),
|
| 327 |
+
'dataset': 'LIAR',
|
| 328 |
+
'mode': 'remote',
|
| 329 |
+
'metrics': metrics
|
| 330 |
+
}, f, indent=2, default=str)
|
| 331 |
+
print(f"[Remote] Metrics: {json_path}")
|
| 332 |
+
|
| 333 |
+
|
| 334 |
+
def main():
|
| 335 |
+
parser = argparse.ArgumentParser(description='LIAR benchmark via HF Space API')
|
| 336 |
+
parser.add_argument('--url', type=str, default=None,
|
| 337 |
+
help='HF Space API URL')
|
| 338 |
+
parser.add_argument('--split', type=str, default='test',
|
| 339 |
+
choices=['train', 'valid', 'test'])
|
| 340 |
+
parser.add_argument('--sample', type=int, default=None,
|
| 341 |
+
help='Number of statements to sample')
|
| 342 |
+
parser.add_argument('--data-dir', type=str, default=None)
|
| 343 |
+
parser.add_argument('--output', type=str, default=None)
|
| 344 |
+
parser.add_argument('--verbose', '-v', action='store_true')
|
| 345 |
+
parser.add_argument('--timeout', type=int, default=60)
|
| 346 |
+
|
| 347 |
+
args = parser.parse_args()
|
| 348 |
+
|
| 349 |
+
benchmark = RemoteLIARBenchmark(
|
| 350 |
+
api_url=args.url,
|
| 351 |
+
data_dir=args.data_dir,
|
| 352 |
+
timeout=args.timeout
|
| 353 |
+
)
|
| 354 |
+
|
| 355 |
+
try:
|
| 356 |
+
metrics = benchmark.run_benchmark(
|
| 357 |
+
split=args.split,
|
| 358 |
+
sample_size=args.sample,
|
| 359 |
+
verbose=args.verbose
|
| 360 |
+
)
|
| 361 |
+
|
| 362 |
+
benchmark.print_results(metrics)
|
| 363 |
+
|
| 364 |
+
output = args.output or f"liar_benchmark_remote_{args.split}.csv"
|
| 365 |
+
benchmark.save_results(output, metrics)
|
| 366 |
+
|
| 367 |
+
except FileNotFoundError as e:
|
| 368 |
+
print(f"\n❌ Error: {e}")
|
| 369 |
+
sys.exit(1)
|
| 370 |
+
|
| 371 |
+
|
| 372 |
+
if __name__ == "__main__":
|
| 373 |
+
main()
|
syscred/static/index.html
CHANGED
|
@@ -333,6 +333,69 @@
|
|
| 333 |
transform: none;
|
| 334 |
box-shadow: none;
|
| 335 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 336 |
</style>
|
| 337 |
</head>
|
| 338 |
|
|
@@ -345,12 +408,23 @@
|
|
| 345 |
|
| 346 |
<div class="search-box">
|
| 347 |
<div class="input-group">
|
| 348 |
-
<input type="text" id="urlInput" placeholder="Entrez une URL à analyser
|
| 349 |
autofocus>
|
| 350 |
<button id="analyzeBtn" onclick="analyzeUrl()">
|
| 351 |
🔍 Analyser
|
| 352 |
</button>
|
| 353 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 354 |
</div>
|
| 355 |
|
| 356 |
<div class="loading" id="loading">
|
|
@@ -393,7 +467,32 @@
|
|
| 393 |
</div>
|
| 394 |
|
| 395 |
<script>
|
| 396 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 397 |
|
| 398 |
async function analyzeUrl() {
|
| 399 |
const urlInput = document.getElementById('urlInput');
|
|
|
|
| 333 |
transform: none;
|
| 334 |
box-shadow: none;
|
| 335 |
}
|
| 336 |
+
/* Backend Toggle Switch */
|
| 337 |
+
.backend-toggle {
|
| 338 |
+
display: flex;
|
| 339 |
+
align-items: center;
|
| 340 |
+
justify-content: center;
|
| 341 |
+
gap: 0.75rem;
|
| 342 |
+
margin-top: 1rem;
|
| 343 |
+
padding: 0.75rem;
|
| 344 |
+
background: rgba(0,0,0,0.2);
|
| 345 |
+
border-radius: 10px;
|
| 346 |
+
}
|
| 347 |
+
.backend-toggle label {
|
| 348 |
+
font-size: 0.85rem;
|
| 349 |
+
color: #8b8ba7;
|
| 350 |
+
cursor: pointer;
|
| 351 |
+
}
|
| 352 |
+
.backend-toggle .active {
|
| 353 |
+
color: #a855f7;
|
| 354 |
+
font-weight: 600;
|
| 355 |
+
}
|
| 356 |
+
.toggle-switch {
|
| 357 |
+
position: relative;
|
| 358 |
+
width: 50px;
|
| 359 |
+
height: 26px;
|
| 360 |
+
}
|
| 361 |
+
.toggle-switch input {
|
| 362 |
+
opacity: 0;
|
| 363 |
+
width: 0;
|
| 364 |
+
height: 0;
|
| 365 |
+
}
|
| 366 |
+
.toggle-slider {
|
| 367 |
+
position: absolute;
|
| 368 |
+
cursor: pointer;
|
| 369 |
+
top: 0; left: 0; right: 0; bottom: 0;
|
| 370 |
+
background: linear-gradient(135deg, #22c55e, #16a34a);
|
| 371 |
+
border-radius: 26px;
|
| 372 |
+
transition: 0.3s;
|
| 373 |
+
}
|
| 374 |
+
.toggle-slider:before {
|
| 375 |
+
position: absolute;
|
| 376 |
+
content: '';
|
| 377 |
+
height: 20px;
|
| 378 |
+
width: 20px;
|
| 379 |
+
left: 3px;
|
| 380 |
+
bottom: 3px;
|
| 381 |
+
background: white;
|
| 382 |
+
border-radius: 50%;
|
| 383 |
+
transition: 0.3s;
|
| 384 |
+
}
|
| 385 |
+
.toggle-switch input:checked + .toggle-slider {
|
| 386 |
+
background: linear-gradient(135deg, #7c3aed, #a855f7);
|
| 387 |
+
}
|
| 388 |
+
.toggle-switch input:checked + .toggle-slider:before {
|
| 389 |
+
transform: translateX(24px);
|
| 390 |
+
}
|
| 391 |
+
.backend-status {
|
| 392 |
+
font-size: 0.75rem;
|
| 393 |
+
color: #6b6b8a;
|
| 394 |
+
text-align: center;
|
| 395 |
+
margin-top: 0.5rem;
|
| 396 |
+
}
|
| 397 |
+
.backend-status.local { color: #22c55e; }
|
| 398 |
+
.backend-status.remote { color: #a855f7; }
|
| 399 |
</style>
|
| 400 |
</head>
|
| 401 |
|
|
|
|
| 408 |
|
| 409 |
<div class="search-box">
|
| 410 |
<div class="input-group">
|
| 411 |
+
<input type="text" id="urlInput" placeholder="Entrez une URL ou du texte à analyser"
|
| 412 |
autofocus>
|
| 413 |
<button id="analyzeBtn" onclick="analyzeUrl()">
|
| 414 |
🔍 Analyser
|
| 415 |
</button>
|
| 416 |
</div>
|
| 417 |
+
|
| 418 |
+
<!-- Backend Toggle -->
|
| 419 |
+
<div class="backend-toggle">
|
| 420 |
+
<label id="labelLocal" class="active">🖥️ Local</label>
|
| 421 |
+
<div class="toggle-switch">
|
| 422 |
+
<input type="checkbox" id="backendToggle" onchange="toggleBackend()">
|
| 423 |
+
<span class="toggle-slider"></span>
|
| 424 |
+
</div>
|
| 425 |
+
<label id="labelRemote">☁️ HF Space</label>
|
| 426 |
+
</div>
|
| 427 |
+
<div class="backend-status local" id="backendStatus">Backend: localhost:5001 (léger, sans ML)</div>
|
| 428 |
</div>
|
| 429 |
|
| 430 |
<div class="loading" id="loading">
|
|
|
|
| 467 |
</div>
|
| 468 |
|
| 469 |
<script>
|
| 470 |
+
// Backend URLs
|
| 471 |
+
const LOCAL_API_URL = 'http://localhost:5001';
|
| 472 |
+
const REMOTE_API_URL = 'https://domloyer-syscred.hf.space';
|
| 473 |
+
let API_URL = LOCAL_API_URL;
|
| 474 |
+
|
| 475 |
+
function toggleBackend() {
|
| 476 |
+
const toggle = document.getElementById('backendToggle');
|
| 477 |
+
const status = document.getElementById('backendStatus');
|
| 478 |
+
const labelLocal = document.getElementById('labelLocal');
|
| 479 |
+
const labelRemote = document.getElementById('labelRemote');
|
| 480 |
+
|
| 481 |
+
if (toggle.checked) {
|
| 482 |
+
API_URL = REMOTE_API_URL;
|
| 483 |
+
status.textContent = 'Backend: HF Space (ML complet, plus lent)';
|
| 484 |
+
status.className = 'backend-status remote';
|
| 485 |
+
labelLocal.classList.remove('active');
|
| 486 |
+
labelRemote.classList.add('active');
|
| 487 |
+
} else {
|
| 488 |
+
API_URL = LOCAL_API_URL;
|
| 489 |
+
status.textContent = 'Backend: localhost:5001 (léger, sans ML)';
|
| 490 |
+
status.className = 'backend-status local';
|
| 491 |
+
labelLocal.classList.add('active');
|
| 492 |
+
labelRemote.classList.remove('active');
|
| 493 |
+
}
|
| 494 |
+
console.log('[SysCRED] Backend switched to:', API_URL);
|
| 495 |
+
}
|
| 496 |
|
| 497 |
async function analyzeUrl() {
|
| 498 |
const urlInput = document.getElementById('urlInput');
|
syscred/verification_system.py
CHANGED
|
@@ -464,14 +464,28 @@ class CredibilityVerificationSystem:
|
|
| 464 |
adjustments += w_ent * boost
|
| 465 |
total_weight_used += w_ent
|
| 466 |
|
| 467 |
-
# 6. Text Coherence (
|
| 468 |
-
w_coh = self.weights.get('coherence', 0.
|
| 469 |
coherence = nlp_results.get('coherence_score')
|
| 470 |
if coherence is not None:
|
| 471 |
# Coherence is usually 0.0 to 1.0
|
| 472 |
# Center around 0.5: >0.5 improves, <0.5 penalizes
|
| 473 |
adjustments += (coherence - 0.5) * w_coh
|
| 474 |
total_weight_used += w_coh
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 475 |
|
| 476 |
# Final calculation
|
| 477 |
# Base 0.5 + sum of weighted adjustments
|
|
@@ -648,6 +662,15 @@ class CredibilityVerificationSystem:
|
|
| 648 |
'coherence_score': nlp_results.get('coherence_score'),
|
| 649 |
'sentiment_explanation_preview': (nlp_results.get('sentiment_explanation') or [])[:3]
|
| 650 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 651 |
# [NEW] TREC Evidence section
|
| 652 |
'evidences': evidences or [],
|
| 653 |
'metadonnees': {}
|
|
@@ -758,6 +781,20 @@ class CredibilityVerificationSystem:
|
|
| 758 |
'weight': f"{int(self.weights.get('sentiment_neutrality',0)*100)}%",
|
| 759 |
'impact': '-' if sent.get('score', 0) > 0.9 else '0'
|
| 760 |
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 761 |
|
| 762 |
return factors
|
| 763 |
|
|
@@ -828,31 +865,44 @@ class CredibilityVerificationSystem:
|
|
| 828 |
print("[SysCRED] Running rule-based analysis...")
|
| 829 |
rule_results = self.rule_based_analysis(cleaned_text, external_data)
|
| 830 |
|
| 831 |
-
# 5. NLP
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 832 |
print("[SysCRED] Running NLP analysis...")
|
| 833 |
nlp_results = self.nlp_analysis(cleaned_text)
|
| 834 |
|
| 835 |
-
#
|
| 836 |
overall_score = self.calculate_overall_score(rule_results, nlp_results)
|
| 837 |
print(f"[SysCRED] ✓ Credibility score: {overall_score:.2f}")
|
| 838 |
|
| 839 |
-
# 7. [NEW] GraphRAG Context Retrieval
|
| 840 |
-
graph_context = ""
|
| 841 |
-
similar_uris = []
|
| 842 |
-
if self.graph_rag and 'source_analysis' in rule_results:
|
| 843 |
-
domain = rule_results['source_analysis'].get('domain', '')
|
| 844 |
-
# Pass keywords for text search if domain is empty or generic
|
| 845 |
-
keywords = []
|
| 846 |
-
if not domain and cleaned_text:
|
| 847 |
-
keywords = cleaned_text.split()[:5] # Simple keyword extraction
|
| 848 |
-
|
| 849 |
-
context = self.graph_rag.get_context(domain, keywords=keywords)
|
| 850 |
-
graph_context = context.get('full_text', '')
|
| 851 |
-
similar_uris = context.get('similar_uris', [])
|
| 852 |
-
|
| 853 |
-
if "Graph Memory" in graph_context:
|
| 854 |
-
print(f"[SysCRED] GraphRAG Context Found: {graph_context.splitlines()[1]}")
|
| 855 |
-
|
| 856 |
# 8. Generate report (Updated to include context)
|
| 857 |
report = self.generate_report(
|
| 858 |
input_data, cleaned_text, rule_results,
|
|
|
|
| 464 |
adjustments += w_ent * boost
|
| 465 |
total_weight_used += w_ent
|
| 466 |
|
| 467 |
+
# 6. Text Coherence (12%) (Vocabulary Diversity)
|
| 468 |
+
w_coh = self.weights.get('coherence', 0.12)
|
| 469 |
coherence = nlp_results.get('coherence_score')
|
| 470 |
if coherence is not None:
|
| 471 |
# Coherence is usually 0.0 to 1.0
|
| 472 |
# Center around 0.5: >0.5 improves, <0.5 penalizes
|
| 473 |
adjustments += (coherence - 0.5) * w_coh
|
| 474 |
total_weight_used += w_coh
|
| 475 |
+
|
| 476 |
+
# 7. [NEW] GraphRAG Context Score (15%)
|
| 477 |
+
# This uses historical knowledge from the knowledge graph
|
| 478 |
+
w_graph = self.weights.get('graph_context', 0.15)
|
| 479 |
+
graph_context_data = rule_results.get('graph_context_data', {})
|
| 480 |
+
if graph_context_data and graph_context_data.get('confidence', 0) > 0:
|
| 481 |
+
# Use combined score from GraphRAG
|
| 482 |
+
graph_score = graph_context_data.get('combined_score', 0.5)
|
| 483 |
+
confidence = graph_context_data.get('confidence', 0)
|
| 484 |
+
|
| 485 |
+
# Scale adjustment by confidence (0 confidence = no effect)
|
| 486 |
+
adjustment_factor = (graph_score - 0.5) * w_graph * confidence
|
| 487 |
+
adjustments += adjustment_factor
|
| 488 |
+
total_weight_used += w_graph * confidence # Partial weight based on confidence
|
| 489 |
|
| 490 |
# Final calculation
|
| 491 |
# Base 0.5 + sum of weighted adjustments
|
|
|
|
| 662 |
'coherence_score': nlp_results.get('coherence_score'),
|
| 663 |
'sentiment_explanation_preview': (nlp_results.get('sentiment_explanation') or [])[:3]
|
| 664 |
},
|
| 665 |
+
# [NEW] GraphRAG section
|
| 666 |
+
'graphRAG': {
|
| 667 |
+
'context_text': graph_context,
|
| 668 |
+
'context_score': rule_results.get('graph_context_data', {}).get('combined_score'),
|
| 669 |
+
'confidence': rule_results.get('graph_context_data', {}).get('confidence', 0),
|
| 670 |
+
'has_history': rule_results.get('graph_context_data', {}).get('has_history', False),
|
| 671 |
+
'history_count': rule_results.get('graph_context_data', {}).get('history_count', 0),
|
| 672 |
+
'similar_claims_count': rule_results.get('graph_context_data', {}).get('similar_count', 0)
|
| 673 |
+
},
|
| 674 |
# [NEW] TREC Evidence section
|
| 675 |
'evidences': evidences or [],
|
| 676 |
'metadonnees': {}
|
|
|
|
| 781 |
'weight': f"{int(self.weights.get('sentiment_neutrality',0)*100)}%",
|
| 782 |
'impact': '-' if sent.get('score', 0) > 0.9 else '0'
|
| 783 |
})
|
| 784 |
+
|
| 785 |
+
# 5. GraphRAG Context (NEW)
|
| 786 |
+
graph_data = rule_results.get('graph_context_data', {})
|
| 787 |
+
if graph_data.get('confidence', 0) > 0:
|
| 788 |
+
graph_score = graph_data.get('combined_score', 0.5)
|
| 789 |
+
impact = '+' if graph_score > 0.6 else ('-' if graph_score < 0.4 else '0')
|
| 790 |
+
factors.append({
|
| 791 |
+
'factor': 'Graph Context (History)',
|
| 792 |
+
'value': f"Score: {graph_score:.2f}, Confidence: {graph_data.get('confidence', 0):.0%}",
|
| 793 |
+
'weight': f"{int(self.weights.get('graph_context',0)*100)}%",
|
| 794 |
+
'impact': impact,
|
| 795 |
+
'history_count': graph_data.get('history_count', 0),
|
| 796 |
+
'similar_count': graph_data.get('similar_count', 0)
|
| 797 |
+
})
|
| 798 |
|
| 799 |
return factors
|
| 800 |
|
|
|
|
| 865 |
print("[SysCRED] Running rule-based analysis...")
|
| 866 |
rule_results = self.rule_based_analysis(cleaned_text, external_data)
|
| 867 |
|
| 868 |
+
# 5. [MOVED] GraphRAG Context Retrieval (Before NLP for context)
|
| 869 |
+
graph_context = ""
|
| 870 |
+
similar_uris = []
|
| 871 |
+
graph_context_data = {}
|
| 872 |
+
|
| 873 |
+
if self.graph_rag and 'source_analysis' in rule_results:
|
| 874 |
+
domain = rule_results['source_analysis'].get('domain', '')
|
| 875 |
+
# Pass keywords for text search if domain is empty or generic
|
| 876 |
+
keywords = []
|
| 877 |
+
if cleaned_text:
|
| 878 |
+
# Extract meaningful keywords (filter out short words)
|
| 879 |
+
keywords = [w for w in cleaned_text.split()[:10] if len(w) > 4]
|
| 880 |
+
|
| 881 |
+
# Get text context for display
|
| 882 |
+
context = self.graph_rag.get_context(domain, keywords=keywords)
|
| 883 |
+
graph_context = context.get('full_text', '')
|
| 884 |
+
similar_uris = context.get('similar_uris', [])
|
| 885 |
+
|
| 886 |
+
# Get numerical score for integration into scoring
|
| 887 |
+
graph_context_data = self.graph_rag.compute_context_score(domain, keywords=keywords)
|
| 888 |
+
|
| 889 |
+
# Add to rule_results for use in calculate_overall_score
|
| 890 |
+
rule_results['graph_context_data'] = graph_context_data
|
| 891 |
+
|
| 892 |
+
if graph_context_data.get('has_history'):
|
| 893 |
+
print(f"[SysCRED] GraphRAG: Domain has {graph_context_data['history_count']} prior evaluations, "
|
| 894 |
+
f"avg score: {graph_context_data['history_score']:.2f}")
|
| 895 |
+
if graph_context_data.get('similar_count', 0) > 0:
|
| 896 |
+
print(f"[SysCRED] GraphRAG: Found {graph_context_data['similar_count']} similar claims")
|
| 897 |
+
|
| 898 |
+
# 6. NLP analysis
|
| 899 |
print("[SysCRED] Running NLP analysis...")
|
| 900 |
nlp_results = self.nlp_analysis(cleaned_text)
|
| 901 |
|
| 902 |
+
# 7. Calculate score (Now includes GraphRAG context)
|
| 903 |
overall_score = self.calculate_overall_score(rule_results, nlp_results)
|
| 904 |
print(f"[SysCRED] ✓ Credibility score: {overall_score:.2f}")
|
| 905 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 906 |
# 8. Generate report (Updated to include context)
|
| 907 |
report = self.generate_report(
|
| 908 |
input_data, cleaned_text, rule_results,
|