Spaces:

DomLoyer
/

syscred

Sleeping

DominiqueLoyer commited on Mar 2

Commit

231c7ce

1 Parent(s): 6699583

🔄 Sync with stable GitHub main (v2.3.1-stable-feb08 + cherry-picked blue glow, Google Fact Check)

- Synced from systemFactChecking Production repo
- Added missing requirements-distilled.txt
- Includes NER, E-E-A-T, TREC metrics, GraphRAG
- CPU-only distilled models for HF Space

Files changed (24) hide show

CITATION.cff +0 -63
Dockerfile +33 -11
README.md +14 -88
ontology/sysCRED_data.ttl +0 -0
ontology/sysCRED_onto26avrtil.ttl +0 -1030
requirements-distilled.txt +0 -51
requirements.txt +0 -38
syscred/__init__.py +9 -19
syscred/backend_app.py +2 -72
syscred/config.py +8 -11
syscred/database.py +7 -23
syscred/db_store.py +0 -354
syscred/demo_server.py +0 -77
syscred/eeat_calculator.py +210 -406
syscred/ner_analyzer.py +133 -218
syscred/ontology_manager.py +7 -7
syscred/requirements-distilled.txt +13 -14
syscred/requirements-light.txt +0 -31
syscred/requirements.txt +0 -34
syscred/requirements_light.txt +0 -19
syscred/static/index.html +4 -12
syscred/syscred/eeat_calculator.py +466 -0
syscred/syscred/ner_analyzer.py +283 -0
syscred/verification_system.py +127 -129

CITATION.cff DELETED Viewed

@@ -1,63 +0,0 @@
-cff-version: 1.2.0
-message: "If you use SysCRED, please cite it as below."
-type: software
-authors:
-  - family-names: Loyer
-    given-names: Dominique S.
-    orcid: "https://orcid.org/0009-0003-9713-7109"
-    affiliation: "Université du Québec à Montréal (UQAM),
-                  Institut des Sciences Cognitives"
-title: "SysCRED: Système Hybride d'Évaluation de la Crédibilité
-        de l'Information"
-date-released: "2026-02-28"
-year: 2026
-url: "https://huggingface.co/spaces/DomLoyer/syscred"
-repository-code: "https://huggingface.co/spaces/DomLoyer/syscred"
-license: "CC-BY-4.0"
-keywords:
-  - credibility evaluation
-  - fact-checking
-  - neuro-symbolic AI
-  - ontology
-  - knowledge graph
-  - information retrieval
-  - E-E-A-T
-  - hybrid system
-  - NLP
-  - transformers
-  - RDFLib
-  - BM25
-  - TREC
-references:
-  - type: article
-    authors:
-      - family-names: Loyer
-        given-names: Dominique S.
-    title: "Modeling a Hybrid System for Verifying Information Credibility"
-    year: 2025
-    doi: "10.13140/RG.2.2.36348.24961"
-  - type: article
-    authors:
-      - family-names: Loyer
-        given-names: Dominique S.
-    title: "Hybrid System Ontology for Information Source Verification"
-    year: 2025
-    doi: "10.13140/RG.2.2.22926.47680"
-preferred-citation:
-  type: software
-  authors:
-    - family-names: Loyer
-      given-names: Dominique S.
-      orcid: "https://orcid.org/0009-0003-9713-7109"
-  title: "SysCRED: Système Hybride d'Évaluation de la Crédibilité
-          de l'Information"
-  year: 2026
-  url: "https://huggingface.co/spaces/DomLoyer/syscred"
-  publisher: "Hugging Face"

Dockerfile CHANGED Viewed

@@ -1,5 +1,5 @@
 # SysCRED Docker Configuration for Hugging Face Spaces
-# Full version with PyTorch and Transformers
 FROM python:3.10-slim
 WORKDIR /app
@@ -7,27 +7,48 @@ WORKDIR /app
 ENV PYTHONDONTWRITEBYTECODE=1
 ENV PYTHONUNBUFFERED=1
 ENV PYTHONPATH=/app
 ENV SYSCRED_LOAD_ML_MODELS=true
-ENV SYSCRED_ENV=production
 # Install system dependencies
-RUN apt-get update && apt-get install -y --no-install-recommends \
     build-essential \
     && rm -rf /var/lib/apt/lists/*
-# Copy requirements (full version with ML)
-COPY requirements.txt /app/requirements.txt
-# Install dependencies (includes PyTorch, Transformers)
 RUN pip install --no-cache-dir -r requirements.txt
-# Download spaCy models for NER
-RUN python -m spacy download en_core_web_md || true
-RUN python -m spacy download fr_core_news_md || true
 # Copy application code
 COPY syscred/ /app/syscred/
-COPY ontology/ /app/ontology/
 # Create user for HF Spaces (required)
 RUN useradd -m -u 1000 user
@@ -40,4 +61,5 @@ WORKDIR /app
 EXPOSE 7860
 # Run with HF Spaces port (7860)
-CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--workers", "2", "--timeout", "300", "syscred.backend_app:app"]

 # SysCRED Docker Configuration for Hugging Face Spaces
+# OPTIMIZED version with Distilled Models for faster startup
 FROM python:3.10-slim
 WORKDIR /app
 ENV PYTHONDONTWRITEBYTECODE=1
 ENV PYTHONUNBUFFERED=1
 ENV PYTHONPATH=/app
+# ============================================
+# KEY OPTIMIZATION: Use distilled models
+# ============================================
 ENV SYSCRED_LOAD_ML_MODELS=true
+ENV SYSCRED_USE_DISTILLED=true
+ENV TRANSFORMERS_CACHE=/app/.cache/huggingface
+ENV HF_HOME=/app/.cache/huggingface
 # Install system dependencies
+RUN apt-get update && apt-get install -y \
     build-essential \
     && rm -rf /var/lib/apt/lists/*
+# Copy optimized requirements (distilled models, CPU-only torch)
+COPY syscred/requirements-distilled.txt /app/requirements.txt
+# Install dependencies
 RUN pip install --no-cache-dir -r requirements.txt
+# ============================================
+# PRE-DOWNLOAD DISTILLED MODELS (Build Time)
+# This avoids timeout during first request
+# ============================================
+RUN python -c "from transformers import pipeline; \
+    pipeline('sentiment-analysis', model='distilbert-base-uncased-finetuned-sst-2-english'); \
+    pipeline('ner', model='dslim/bert-base-NER'); \
+    print('✓ Distilled models pre-downloaded')"
+# Download small spaCy models
+RUN pip install spacy && \
+    python -m spacy download en_core_web_sm && \
+    python -m spacy download fr_core_news_sm && \
+    echo '✓ spaCy models downloaded'
+# Pre-download sentence transformer (small version)
+RUN python -c "from sentence_transformers import SentenceTransformer; \
+    SentenceTransformer('all-MiniLM-L6-v2'); \
+    print('✓ Sentence transformer pre-downloaded')"
 # Copy application code
 COPY syscred/ /app/syscred/
 # Create user for HF Spaces (required)
 RUN useradd -m -u 1000 user
 EXPOSE 7860
 # Run with HF Spaces port (7860)
+# Increased workers to 4 for better concurrency, timeout 600s
+CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--workers", "1", "--threads", "4", "--timeout", "600", "syscred.backend_app:app"]

README.md CHANGED Viewed

@@ -4,98 +4,24 @@ emoji: 🔍
 colorFrom: purple
 colorTo: blue
 sdk: docker
 app_port: 7860
-pinned: true
-license: cc-by-4.0
-tags:
-  - credibility
-  - fact-checking
-  - neuro-symbolic
-  - information-retrieval
-  - E-E-A-T
-  - ontology
-  - knowledge-graph
-  - NLP
-  - hybrid-system
-  - RDFLib
-  - transformers
-  - docker
-datasets:
-  - DomLoyer/trec-ap-88-90
-  - ucsbnlp/liar
 ---
-# SysCRED — Système Hybride d'Évaluation de la Crédibilité
-[![DOI](https://img.shields.io/badge/DOI-10.13140%2FRG.2.2.22926.47680-blue)](https://doi.org/10.13140/RG.2.2.22926.47680)
-[![ORCID](https://img.shields.io/badge/ORCID-0009--0003--9713--7189-green)](https://orcid.org/0009-0003-9713-7109)
-[![License: CC BY 4.0](https://img.shields.io/badge/License-CC%20BY%204.0-lightgrey.svg)](https://creativecommons.org/licenses/by/4.0/)
-[![Space](https://img.shields.io/badge/🤗%20Space-SysCRED-yellow)](https://huggingface.co/spaces/DomLoyer/syscred)
-SysCRED est un **système hybride neuro-symbolique** d'évaluation de la crédibilité
-de l'information, développé dans le cadre d'une thèse de doctorat en informatique
-cognitive à l'UQAM. Il combine des **règles de prédicats ontologiques** (OWL/RDFLib)
-et des **modèles NLP neuronaux** (Transformers) pour produire un score de crédibilité
-multidimensionnel.
-🔗 **Demo live** : [domloyer-syscred.hf.space](https://domloyer-syscred.hf.space)
-## Description
-SysCRED évalue la crédibilité des sources et des contenus informationnels selon
-une approche hybride inspirée des critères **E-E-A-T de Google**
-(Experience, Expertise, Authoritativeness, Trustworthiness) et des métriques
-formelles de la recherche d'information (Précision, Rappel, F-measure, NDCG).
-## Fonctionnalités
-- 🔍 **Analyse URL et texte** : score de crédibilité sur contenu web et textuel
-- 🧠 **Analyse NLP** : cohérence sémantique via modèles Transformers
-- 📊 **Score SEO et réputation** : évaluation quantitative de la source
-- 🌐 **Visualisation Knowledge Graph** : graphe de connaissances interactif D3.js
-- 🔗 **Raisonnement ontologique** : règles de prédicats formels via RDFLib/OWL
-- 📈 **Métriques IR** : BM25, TF-IDF, NDCG, Précision@k, Rappel@k
-## Architecture
-```bash
-SysCRED/
-├── Couche symbolique/     → Ontologie OWL + règles SPARQL/SWRL (RDFLib)
-├── Couche neuronale/      → Modèles NLP Transformers (classification, NER)
-├── Couche agrégation/     → Score de crédibilité hybride pondéré
-└── Couche visualisation/  → Knowledge Graph D3.js + Dashboard Flask
-# version étendue
-SysCRED/
-├── ontology/                        → Couche symbolique
-│   ├── syscred.owl                  # Ontologie OWL principale
-│   ├── rules.sparql                 # Règles SPARQL
-│   └── swrl_rules.py                # Règles SWRL via RDFLib
-├── nlp/                             → Couche neuronale
-│   ├── classifier.py                # Classification Transformers
-│   └── ner.py                       # Named Entity Recognition
-├── scoring/                         → Couche agrégation
-│   ├── hybrid_score.py              # Score crédibilité hybride pondéré
-│   ├── eeat_metrics.py              # Critères E-E-A-T
-│   └── ir_metrics.py                # BM25, TF-IDF, NDCG
-├── visualization/                   → Couche visualisation
-│   ├── knowledge_graph.js           # Knowledge Graph D3.js
-│   └── dashboard/                   # Dashboard Flask
-├── data/
-│   ├── trec-ap-88-90/               # Dataset TREC AP 88-90
-│   └── liar/                        # Dataset LIAR
-├── app.py                           # Point d'entrée Flask
-├── Dockerfile                       # Déploiement HF Spaces
-├── requirements.txt
-├── README.md
-└── CITATION.cff
-```

 colorFrom: purple
 colorTo: blue
 sdk: docker
+pinned: false
+license: mit
 app_port: 7860
 ---
+# SysCRED - Credibility Verification System
+A hybrid neuro-symbolic system for credibility verification and fact-checking.
+## Features
+- 🔍 URL and text credibility analysis
+- 🧠 NLP-based coherence analysis with Transformers
+- 📊 SEO and source reputation scoring
+- 🌐 Knowledge graph visualization with D3.js
+- 🔗 Ontology-based reasoning with RDFLib
+## Author
+**Dominique S. Loyer** - UQAM
+## Usage
+Enter a URL or paste text to analyze its credibility score based on multiple factors.

ontology/sysCRED_data.ttl DELETED Viewed

The diff for this file is too large to render. See raw diff

ontology/sysCRED_onto26avrtil.ttl DELETED Viewed

@@ -1,1030 +0,0 @@
-@base <https://syscred.uqam.ca/ontology#> .
-@prefix : <https://syscred.uqam.ca/ontology#> .
-@prefix owl: <http://www.w3.org/2002/07/owl#> .
-@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
-@prefix xml: <http://www.w3.org/XML/1998/namespace> .
-@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
-@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
-#
-#
-# #################################################################
-# #
-# #    Annotation properties
-# #
-# #################################################################
-#
-#
-# http://www.w3.org/2002/07/owl#maxCardinality
-#
-#
-#
-# #################################################################
-# #
-# #    Object Properties
-# #
-# #################################################################
-#
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#analyzesSource
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#appliesRule
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#assignsCredibilityLevel
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#basedOnEvidence
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#concernsCriterion
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#concernsInformation
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#configuredByExpert
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#evaluatesCriterion
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#fetchesDataFrom
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#hasAuthor
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#hasCriterionResult
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#hasOriginalSource
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#includesNLPResult
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#includesRuleResult
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#includesSourceAnalysis
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#isReportOf
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#isSubjectOfRequest
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#obtainedVia
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#originatesFrom
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#producesReport
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#submitsRequest
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#submittedBy
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#usesModel
-#
-#
-#
-# #################################################################
-# #
-# #    Data properties
-# #
-# #################################################################
-#
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#authorName
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#coherenceScore
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#completionTimestamp
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#credibilityLevelValue
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#credibilityScoreValue
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#criterionResultConfidence
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#criterionResultValue
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#detectedBiases
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#evidenceSnippet
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#evidenceURL
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#informationContent
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#informationURL
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#modelName
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#modelType
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#reportSummary
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#requestStatus
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#ruleDescription
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#ruleLogic
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#ruleResultValid
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#ruleWeight
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#sentimentScore
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#sourceAnalyzedReputation
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#sourceAnalyzedURL
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#sourceMentionsCount
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#sourceReputationScore
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#sourceURL
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#submissionTimestamp
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#userName
-#
-#
-#
-# #################################################################
-# #
-# #    Classes
-# #
-# #################################################################
-#
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#AcademicJournal
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#ApiLLM
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#Author
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#BaseDeFaits
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#CredibilityLevel
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#Evidence
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#Expert
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#FactCheckingOrganization
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#InfoSourceAnalyse
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#InformationFaibleCredibilite
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#InformationHauteCredibilite
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#InformationMoyenneCredibilite
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#InformationSoumise
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#InformationVerifiee
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#ModeleIA
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#MoteurRecherche
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#NewsWebsite
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#Niveau_Bas
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#Niveau_Haut
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#Niveau_Moyen
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#Niveau_NonVerifie
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#PersonalBlog
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#RapportEvaluation
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#RefutingEvidence
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#RegleVerification
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#RequeteEvaluation
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#ResultatCritere
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#ResultatNLP
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#ResultatRegle
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#ResultatVerification
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#SocialMediaPlatform
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#Source
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#SupportingEvidence
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#SystemeExterne
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#User
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#VerificationCriterion
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#VerificationMethod
-#
-#
-#
-# #################################################################
-# #
-# #    Individuals
-# #
-# #################################################################
-#
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#Criteria_AuthorExpertise
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#Criteria_CoherenceAnalysis
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#Criteria_CrossReferencing
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#Criteria_FactCheckDB
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#Criteria_SourceReputation
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#Criteria_ToneAnalysis
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#Niveau_Bas
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#Niveau_Haut
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#Niveau_Moyen
-#
-# http://www.dic9335.uqam.ca/ontologies/credibility-verification#Niveau_NonVerifie
-#
-#
-#
-# #################################################################
-# #
-# #    Annotations
-# #
-# #################################################################
-#
-#
-#
-#
-#
-#
-#
-#
-# #################################################################
-# #
-# #    General axioms
-# #
-# #################################################################
-#
-#
-#
-#
-#
-#
-# Generated by the OWL API (version 4.5.29.2024-05-13T12:11:03Z) https://github.com/owlcs/owlapi
-<credibility-verification> a owl:Ontology;
-  rdfs:comment "Ontologie enrichie et adaptée modélisant les concepts liés à la vérification de la crédibilité des sources d'information sur le Web, basée sur le rapport de modélisation UML et inspirée par l'ontologie de subvention recherche."@fr;
-  rdfs:label "Ontologie Système de Vérification de Sources (Adaptée Rapport + Subvention)"@fr;
-  owl:versionInfo "2.1" .
-owl:maxCardinality a owl:AnnotationProperty .
-:analyzesSource a owl:ObjectProperty;
-  rdfs:domain :InfoSourceAnalyse;
-  rdfs:range :Source;
-  rdfs:label "analyse source"@fr .
-:appliesRule a owl:ObjectProperty, owl:FunctionalProperty;
-  rdfs:domain :ResultatRegle;
-  rdfs:range :RegleVerification;
-  rdfs:label "applique règle"@fr .
-:assignsCredibilityLevel a owl:ObjectProperty, owl:FunctionalProperty;
-  rdfs:domain :RapportEvaluation;
-  rdfs:range :CredibilityLevel;
-  rdfs:comment "Lie un rapport d'évaluation au niveau de crédibilité final attribué."@fr;
-  rdfs:label "assigne niveau crédibilité"@fr .
-:basedOnEvidence a owl:ObjectProperty;
-  rdfs:domain :RapportEvaluation;
-  rdfs:range :Evidence;
-  rdfs:comment "Lie un rapport d'évaluation aux preuves collectées."@fr;
-  rdfs:label "basé sur preuve"@fr .
-:concernsCriterion a owl:ObjectProperty, owl:FunctionalProperty;
-  rdfs:domain :ResultatCritere;
-  rdfs:range :VerificationCriterion;
-  rdfs:label "concerne critère"@fr .
-:concernsInformation a owl:ObjectProperty, owl:FunctionalProperty;
-  owl:inverseOf :isSubjectOfRequest;
-  rdfs:domain :RequeteEvaluation;
-  rdfs:range :InformationSoumise;
-  rdfs:label "concerne information"@fr .
-:configuredByExpert a owl:ObjectProperty;
-  rdfs:domain _:genid1;
-  rdfs:range :Expert;
-  rdfs:label "configuré par expert"@fr .
-_:genid1 a owl:Class;
-  owl:unionOf _:genid4 .
-_:genid4 a rdf:List;
-  rdf:first :ModeleIA;
-  rdf:rest _:genid3 .
-_:genid3 a rdf:List;
-  rdf:first :RegleVerification;
-  rdf:rest _:genid2 .
-_:genid2 a rdf:List;
-  rdf:first :VerificationCriterion;
-  rdf:rest rdf:nil .
-:evaluatesCriterion a owl:ObjectProperty;
-  rdfs:domain _:genid5;
-  rdfs:range :VerificationCriterion;
-  rdfs:comment "Lie une règle ou un modèle au critère de vérification qu'il est conçu pour évaluer."@fr;
-  rdfs:label "évalue critère"@fr .
-_:genid5 a owl:Class;
-  owl:unionOf _:genid7 .
-_:genid7 a rdf:List;
-  rdf:first :ModeleIA;
-  rdf:rest _:genid6 .
-_:genid6 a rdf:List;
-  rdf:first :RegleVerification;
-  rdf:rest rdf:nil .
-:fetchesDataFrom a owl:ObjectProperty;
-  rdfs:domain :RequeteEvaluation;
-  rdfs:range :SystemeExterne;
-  rdfs:label "récupère données de"@fr .
-:hasAuthor a owl:ObjectProperty;
-  rdfs:domain :InformationSoumise;
-  rdfs:range :Author;
-  rdfs:comment "Lie une information soumise à son auteur présumé."@fr;
-  rdfs:label "a pour auteur"@fr .
-:hasCriterionResult a owl:ObjectProperty;
-  rdfs:domain :RapportEvaluation;
-  rdfs:range :ResultatCritere;
-  rdfs:comment "Lie un rapport au résultat détaillé pour un critère d'évaluation spécifique."@fr;
-  rdfs:label "a résultat pour critère"@fr .
-:hasOriginalSource a owl:ObjectProperty;
-  rdfs:domain :InformationSoumise;
-  rdfs:range :Source;
-  rdfs:comment "Lie une information soumise à sa source d'origine principale."@fr;
-  rdfs:label "a pour source originale"@fr .
-:includesNLPResult a owl:ObjectProperty;
-  rdfs:domain :RapportEvaluation;
-  rdfs:range :ResultatNLP;
-  rdfs:label "inclut résultat NLP"@fr .
-:includesRuleResult a owl:ObjectProperty;
-  rdfs:domain :RapportEvaluation;
-  rdfs:range :ResultatRegle;
-  rdfs:label "inclut résultat règle"@fr .
-:includesSourceAnalysis a owl:ObjectProperty;
-  rdfs:domain :RapportEvaluation;
-  rdfs:range :InfoSourceAnalyse;
-  rdfs:label "inclut analyse source"@fr .
-:isReportOf a owl:ObjectProperty, owl:InverseFunctionalProperty;
-  owl:inverseOf :producesReport;
-  rdfs:domain :RapportEvaluation;
-  rdfs:range :RequeteEvaluation;
-  rdfs:label "est rapport de"@fr .
-:isSubjectOfRequest a owl:ObjectProperty;
-  rdfs:domain :InformationSoumise;
-  rdfs:range :RequeteEvaluation;
-  rdfs:label "est sujet de requête"@fr .
-:obtainedVia a owl:ObjectProperty;
-  rdfs:domain :ResultatCritere;
-  rdfs:range _:genid8;
-  rdfs:label "obtenu via"@fr .
-_:genid8 a owl:Class;
-  owl:unionOf _:genid10 .
-_:genid10 a rdf:List;
-  rdf:first :ResultatNLP;
-  rdf:rest _:genid9 .
-_:genid9 a rdf:List;
-  rdf:first :ResultatRegle;
-  rdf:rest rdf:nil .
-:originatesFrom a owl:ObjectProperty;
-  rdfs:domain :Evidence;
-  rdfs:range :Source;
-  rdfs:comment "Lie une preuve à la source d'où elle a été extraite."@fr;
-  rdfs:label "provient de"@fr .
-:producesReport a owl:ObjectProperty, owl:FunctionalProperty;
-  rdfs:domain :RequeteEvaluation;
-  rdfs:range :RapportEvaluation;
-  rdfs:label "produit rapport"@fr .
-:submitsRequest a owl:ObjectProperty;
-  owl:inverseOf :submittedBy;
-  rdfs:domain :User;
-  rdfs:range :RequeteEvaluation;
-  rdfs:label "soumet requête"@fr .
-:submittedBy a owl:ObjectProperty, owl:FunctionalProperty;
-  rdfs:domain :RequeteEvaluation;
-  rdfs:range :User;
-  rdfs:comment "Lie une requête de vérification à l'utilisateur qui l'a soumise."@fr;
-  rdfs:label "soumise par"@fr .
-:usesModel a owl:ObjectProperty, owl:FunctionalProperty;
-  rdfs:domain :ResultatNLP;
-  rdfs:range :ModeleIA;
-  rdfs:label "utilise modèle"@fr .
-:authorName a owl:DatatypeProperty;
-  rdfs:domain :Author;
-  rdfs:range xsd:string;
-  rdfs:label "nom de l'auteur"@fr .
-:coherenceScore a owl:DatatypeProperty;
-  rdfs:domain :ResultatNLP;
-  rdfs:range xsd:float;
-  rdfs:label "score cohérence"@fr .
-:completionTimestamp a owl:DatatypeProperty, owl:FunctionalProperty;
-  rdfs:domain :RapportEvaluation;
-  rdfs:range xsd:dateTime;
-  rdfs:label "horodatage de complétion"@fr .
-:credibilityLevelValue a owl:DatatypeProperty, owl:FunctionalProperty;
-  rdfs:domain :CredibilityLevel;
-  rdfs:range xsd:float;
-  rdfs:label "valeur numérique niveau"@fr .
-:credibilityScoreValue a owl:DatatypeProperty, owl:FunctionalProperty;
-  rdfs:domain :RapportEvaluation;
-  rdfs:range xsd:float;
-  rdfs:label "valeur score crédibilité"@fr .
-:criterionResultConfidence a owl:DatatypeProperty;
-  rdfs:domain :ResultatCritere;
-  rdfs:range xsd:float;
-  rdfs:label "confiance résultat critère"@fr .
-:criterionResultValue a owl:DatatypeProperty;
-  rdfs:domain :ResultatCritere;
-  rdfs:range xsd:string;
-  rdfs:label "valeur résultat critère"@fr .
-:detectedBiases a owl:DatatypeProperty;
-  rdfs:domain :ResultatNLP;
-  rdfs:range xsd:string;
-  rdfs:comment "";
-  rdfs:label "biais détectés"@fr .
-:evidenceSnippet a owl:DatatypeProperty;
-  rdfs:domain :Evidence;
-  rdfs:range xsd:string;
-  rdfs:label "extrait de la preuve"@fr .
-:evidenceURL a owl:DatatypeProperty;
-  rdfs:domain :Evidence;
-  rdfs:range xsd:anyURI;
-  rdfs:label "URL de la preuve"@fr .
-:informationContent a owl:DatatypeProperty;
-  rdfs:domain :InformationSoumise;
-  rdfs:range xsd:string;
-  rdfs:label "contenu de l'information"@fr .
-:informationURL a owl:DatatypeProperty;
-  rdfs:domain :InformationSoumise;
-  rdfs:range xsd:anyURI;
-  rdfs:label "URL de l'information"@fr .
-:modelName a owl:DatatypeProperty;
-  rdfs:domain :ModeleIA;
-  rdfs:range xsd:string;
-  rdfs:label "nom modèle"@fr .
-:modelType a owl:DatatypeProperty;
-  rdfs:domain :ModeleIA;
-  rdfs:range xsd:string;
-  rdfs:label "type modèle"@fr .
-:reportSummary a owl:DatatypeProperty;
-  rdfs:domain :RapportEvaluation;
-  rdfs:range xsd:string;
-  rdfs:label "résumé du rapport"@fr .
-:requestStatus a owl:DatatypeProperty, owl:FunctionalProperty;
-  rdfs:domain :RequeteEvaluation;
-  rdfs:range xsd:string;
-  rdfs:label "statut requête"@fr .
-:ruleDescription a owl:DatatypeProperty;
-  rdfs:domain :RegleVerification;
-  rdfs:range xsd:string;
-  rdfs:label "description règle"@fr .
-:ruleLogic a owl:DatatypeProperty;
-  rdfs:domain :RegleVerification;
-  rdfs:range xsd:string;
-  rdfs:label "logique règle"@fr .
-:ruleResultValid a owl:DatatypeProperty;
-  rdfs:domain :ResultatRegle;
-  rdfs:range xsd:boolean;
-  rdfs:label "résultat règle valide"@fr .
-:ruleWeight a owl:DatatypeProperty;
-  rdfs:domain :RegleVerification;
-  rdfs:range xsd:float;
-  rdfs:label "poids règle"@fr .
-:sentimentScore a owl:DatatypeProperty;
-  rdfs:domain :ResultatNLP;
-  rdfs:range xsd:float;
-  rdfs:label "score sentiment"@fr .
-:sourceAnalyzedReputation a owl:DatatypeProperty;
-  rdfs:domain :InfoSourceAnalyse;
-  rdfs:range xsd:string;
-  rdfs:label "réputation source analysée"@fr .
-:sourceAnalyzedURL a owl:DatatypeProperty;
-  rdfs:domain :InfoSourceAnalyse;
-  rdfs:range xsd:anyURI;
-  rdfs:label "URL source analysée"@fr .
-:sourceMentionsCount a owl:DatatypeProperty;
-  rdfs:domain :InfoSourceAnalyse;
-  rdfs:range xsd:integer;
-  rdfs:label "mentions source analysée"@fr .
-:sourceReputationScore a owl:DatatypeProperty;
-  rdfs:domain :Source;
-  rdfs:range xsd:float;
-  rdfs:label "score de réputation de la source"@fr .
-:sourceURL a owl:DatatypeProperty, owl:FunctionalProperty;
-  rdfs:domain :Source;
-  rdfs:range xsd:anyURI;
-  rdfs:label "URL de la source"@fr .
-:submissionTimestamp a owl:DatatypeProperty, owl:FunctionalProperty;
-  rdfs:domain :RequeteEvaluation;
-  rdfs:range xsd:dateTime;
-  rdfs:label "horodatage de soumission"@fr .
-:userName a owl:DatatypeProperty;
-  rdfs:domain :User;
-  rdfs:range xsd:string;
-  rdfs:label "nom d'utilisateur"@fr .
-:AcademicJournal a owl:Class;
-  rdfs:subClassOf :Source;
-  rdfs:label "Revue Académique"@fr .
-:ApiLLM a owl:Class;
-  rdfs:subClassOf :SystemeExterne;
-  rdfs:label "API de LLM"@fr .
-:Author a owl:Class;
-  rdfs:comment "Représente la personne ou l'entité créditée pour la création de l'information soumise."@fr;
-  rdfs:label "Auteur"@fr .
-:BaseDeFaits a owl:Class;
-  rdfs:subClassOf :SystemeExterne;
-  rdfs:label "Base de Données de Faits Vérifiés"@fr .
-:CredibilityLevel a owl:Class;
-  rdfs:comment "Représente le niveau de crédibilité qualitatif ou quantitatif attribué dans le rapport."@fr;
-  rdfs:label "Niveau de Crédibilité"@fr .
-:Evidence a owl:Class;
-  rdfs:comment "Représente un élément d'information externe utilisé pour étayer ou réfuter l'information vérifiée."@fr;
-  rdfs:label "Preuve"@fr .
-:Expert a owl:Class;
-  rdfs:subClassOf :User;
-  rdfs:comment "Utilisateur qualifié responsable de la configuration et de l'amélioration du système (règles, modèles)."@fr;
-  rdfs:label "Expert"@fr .
-:FactCheckingOrganization a owl:Class;
-  rdfs:subClassOf :Source;
-  rdfs:label "Organisation de Vérification des Faits"@fr .
-:InfoSourceAnalyse a owl:Class;
-  rdfs:subClassOf _:genid11;
-  rdfs:comment "Détails sur une source spécifique telle qu'analysée et présentée dans le rapport."@fr;
-  rdfs:label "Information Source Analysée"@fr .
-_:genid11 a owl:Restriction;
-  owl:cardinality "1"^^xsd:nonNegativeInteger;
-  owl:onProperty :analyzesSource .
-:InformationFaibleCredibilite a owl:Class;
-  owl:equivalentClass _:genid12;
-  rdfs:subClassOf _:genid22;
-  rdfs:label "Information Faiblement Crédible"@fr .
-_:genid12 a owl:Class;
-  owl:intersectionOf _:genid21 .
-_:genid21 a rdf:List;
-  rdf:first :InformationVerifiee;
-  rdf:rest _:genid19 .
-_:genid19 a rdf:List;
-  rdf:first _:genid20;
-  rdf:rest _:genid17 .
-_:genid17 a rdf:List;
-  rdf:first _:genid18;
-  rdf:rest _:genid13 .
-_:genid13 a rdf:List;
-  rdf:first _:genid14;
-  rdf:rest rdf:nil .
-_:genid14 a owl:Restriction;
-  owl:someValuesFrom _:genid15;
-  owl:onProperty :isSubjectOfRequest .
-_:genid15 a owl:Restriction;
-  owl:someValuesFrom _:genid16;
-  owl:onProperty :producesReport .
-_:genid16 a owl:Restriction;
-  owl:hasValue :Niveau_Bas;
-  owl:onProperty :assignsCredibilityLevel .
-_:genid18 a owl:Class;
-  owl:complementOf :InformationMoyenneCredibilite .
-_:genid20 a owl:Class;
-  owl:complementOf :InformationHauteCredibilite .
-_:genid22 a owl:Restriction;
-  owl:allValuesFrom _:genid23;
-  owl:onProperty :isSubjectOfRequest .
-_:genid23 a owl:Restriction;
-  owl:allValuesFrom _:genid24;
-  owl:onProperty :producesReport .
-_:genid24 a owl:Restriction;
-  owl:hasValue :Niveau_Bas;
-  owl:onProperty :assignsCredibilityLevel .
-:InformationHauteCredibilite a owl:Class;
-  owl:equivalentClass _:genid25;
-  rdfs:subClassOf _:genid31;
-  rdfs:label "Information Hautement Crédible"@fr .
-_:genid25 a owl:Class;
-  owl:intersectionOf _:genid30 .
-_:genid30 a rdf:List;
-  rdf:first :InformationVerifiee;
-  rdf:rest _:genid26 .
-_:genid26 a rdf:List;
-  rdf:first _:genid27;
-  rdf:rest rdf:nil .
-_:genid27 a owl:Restriction;
-  owl:someValuesFrom _:genid28;
-  owl:onProperty :isSubjectOfRequest .
-_:genid28 a owl:Restriction;
-  owl:someValuesFrom _:genid29;
-  owl:onProperty :producesReport .
-_:genid29 a owl:Restriction;
-  owl:hasValue :Niveau_Haut;
-  owl:onProperty :assignsCredibilityLevel .
-_:genid31 a owl:Restriction;
-  owl:allValuesFrom _:genid32;
-  owl:onProperty :isSubjectOfRequest .
-_:genid32 a owl:Restriction;
-  owl:allValuesFrom _:genid33;
-  owl:onProperty :producesReport .
-_:genid33 a owl:Restriction;
-  owl:hasValue :Niveau_Haut;
-  owl:onProperty :assignsCredibilityLevel .
-:InformationMoyenneCredibilite a owl:Class;
-  owl:equivalentClass _:genid34;
-  rdfs:subClassOf _:genid42;
-  rdfs:label "Information Moyennement Crédible"@fr .
-_:genid34 a owl:Class;
-  owl:intersectionOf _:genid41 .
-_:genid41 a rdf:List;
-  rdf:first :InformationVerifiee;
-  rdf:rest _:genid39 .
-_:genid39 a rdf:List;
-  rdf:first _:genid40;
-  rdf:rest _:genid35 .
-_:genid35 a rdf:List;
-  rdf:first _:genid36;
-  rdf:rest rdf:nil .
-_:genid36 a owl:Restriction;
-  owl:someValuesFrom _:genid37;
-  owl:onProperty :isSubjectOfRequest .
-_:genid37 a owl:Restriction;
-  owl:someValuesFrom _:genid38;
-  owl:onProperty :producesReport .
-_:genid38 a owl:Restriction;
-  owl:hasValue :Niveau_Moyen;
-  owl:onProperty :assignsCredibilityLevel .
-_:genid40 a owl:Class;
-  owl:complementOf :InformationHauteCredibilite .
-_:genid42 a owl:Restriction;
-  owl:allValuesFrom _:genid43;
-  owl:onProperty :isSubjectOfRequest .
-_:genid43 a owl:Restriction;
-  owl:allValuesFrom _:genid44;
-  owl:onProperty :producesReport .
-_:genid44 a owl:Restriction;
-  owl:hasValue :Niveau_Moyen;
-  owl:onProperty :assignsCredibilityLevel .
-:InformationSoumise a owl:Class;
-  rdfs:comment "Représente l'unité d'information (texte, URL) telle que soumise pour vérification."@fr;
-  rdfs:label "Information Soumise"@fr .
-:InformationVerifiee a owl:Class;
-  owl:equivalentClass _:genid45;
-  rdfs:label "Information Vérifiée"@fr .
-_:genid45 a owl:Class;
-  owl:intersectionOf _:genid49 .
-_:genid49 a rdf:List;
-  rdf:first :InformationSoumise;
-  rdf:rest _:genid46 .
-_:genid46 a rdf:List;
-  rdf:first _:genid47;
-  rdf:rest rdf:nil .
-_:genid47 a owl:Restriction;
-  owl:someValuesFrom _:genid48;
-  owl:onProperty :isSubjectOfRequest .
-_:genid48 a owl:Restriction;
-  owl:someValuesFrom :RapportEvaluation;
-  owl:onProperty :producesReport .
-:ModeleIA a owl:Class;
-  rdfs:subClassOf :VerificationMethod, _:genid50;
-  rdfs:comment "Représente un modèle d'apprentissage automatique utilisé pour l'analyse sémantique ou autre."@fr;
-  rdfs:label "Modèle IA/NLP"@fr .
-_:genid50 a owl:Restriction;
-  owl:minCardinality "1"^^xsd:nonNegativeInteger;
-  owl:onProperty :evaluatesCriterion .
-:MoteurRecherche a owl:Class;
-  rdfs:subClassOf :SystemeExterne;
-  rdfs:label "Moteur de Recherche"@fr .
-:NewsWebsite a owl:Class;
-  rdfs:subClassOf :Source;
-  rdfs:label "Site d'actualités"@fr .
-:Niveau_Bas a owl:Class, owl:NamedIndividual, :CredibilityLevel;
-  :credibilityLevelValue "0.2"^^xsd:float;
-  rdfs:label "Crédibilité Faible"@fr .
-:Niveau_Haut a owl:Class, owl:NamedIndividual, :CredibilityLevel;
-  :credibilityLevelValue "0.8"^^xsd:float;
-  rdfs:label "Crédibilité Élevée"@fr .
-:Niveau_Moyen a owl:Class, owl:NamedIndividual, :CredibilityLevel;
-  :credibilityLevelValue "0.5"^^xsd:float;
-  rdfs:label "Crédibilité Moyenne"@fr .
-:Niveau_NonVerifie a owl:Class, owl:NamedIndividual, :CredibilityLevel;
-  rdfs:label "Non Vérifié"@fr .
-:PersonalBlog a owl:Class;
-  rdfs:subClassOf :Source;
-  rdfs:label "Blog Personnel"@fr .
-:RapportEvaluation a owl:Class;
-  rdfs:subClassOf _:genid51;
-  rdfs:comment "Encapsule les résultats complets du processus de vérification pour une requête donnée."@fr;
-  rdfs:label "Rapport d'Évaluation"@fr .
-_:genid51 a owl:Restriction;
-  owl:cardinality "1"^^xsd:nonNegativeInteger;
-  owl:onProperty :assignsCredibilityLevel .
-:RefutingEvidence a owl:Class;
-  rdfs:subClassOf :Evidence;
-  owl:disjointWith :SupportingEvidence;
-  rdfs:label "Preuve réfutante"@fr .
-:RegleVerification a owl:Class;
-  rdfs:subClassOf :VerificationMethod, _:genid52;
-  rdfs:comment "Représente une règle logique prédéfinie utilisée pour évaluer un aspect de la crédibilité."@fr;
-  rdfs:label "Règle de Vérification"@fr .
-_:genid52 a owl:Restriction;
-  owl:minCardinality "1"^^xsd:nonNegativeInteger;
-  owl:onProperty :evaluatesCriterion .
-:RequeteEvaluation a owl:Class;
-  rdfs:subClassOf _:genid53, _:genid54, _:genid55;
-  rdfs:comment "Représente une demande spécifique de vérification de crédibilité soumise par un utilisateur."@fr;
-  rdfs:label "Requête d'Évaluation"@fr .
-_:genid53 a owl:Restriction;
-  owl:minCardinality "0"^^xsd:nonNegativeInteger;
-  owl:onProperty :producesReport .
-_:genid54 a owl:Restriction;
-  owl:cardinality "1"^^xsd:nonNegativeInteger;
-  owl:onProperty :concernsInformation .
-_:genid55 a owl:Restriction;
-  owl:cardinality "1"^^xsd:nonNegativeInteger;
-  owl:onProperty :submittedBy .
-:ResultatCritere a owl:Class;
-  rdfs:subClassOf _:genid56, _:genid57;
-  rdfs:comment "Représente le résultat de l'évaluation d'un critère spécifique pour une requête, potentiellement basé sur un ou plusieurs résultats de règles/NLP."@fr;
-  rdfs:label "Résultat Critère"@fr .
-_:genid56 a owl:Restriction;
-  owl:minCardinality "1"^^xsd:nonNegativeInteger;
-  owl:onProperty :obtainedVia .
-_:genid57 a owl:Restriction;
-  owl:cardinality "1"^^xsd:nonNegativeInteger;
-  owl:onProperty :concernsCriterion .
-:ResultatNLP a owl:Class;
-  rdfs:subClassOf :ResultatVerification, _:genid58;
-  owl:disjointWith :ResultatRegle;
-  rdfs:comment "Résultat de l'analyse effectuée par un modèle IA/NLP."@fr;
-  rdfs:label "Résultat NLP"@fr .
-_:genid58 a owl:Restriction;
-  owl:cardinality "1"^^xsd:nonNegativeInteger;
-  owl:onProperty :usesModel .
-:ResultatRegle a owl:Class;
-  rdfs:subClassOf :ResultatVerification, _:genid59;
-  rdfs:comment "Résultat de l'application d'une règle de vérification spécifique."@fr;
-  rdfs:label "Résultat Règle"@fr .
-_:genid59 a owl:Restriction;
-  owl:cardinality "1"^^xsd:nonNegativeInteger;
-  owl:onProperty :appliesRule .
-:ResultatVerification a owl:Class;
-  rdfs:comment "Classe parente pour les résultats issus des différentes méthodes de vérification."@fr;
-  rdfs:label "Résultat de Vérification (Interne)"@fr .
-:SocialMediaPlatform a owl:Class;
-  rdfs:subClassOf :Source;
-  rdfs:label "Plateforme de Média Social"@fr .
-:Source a owl:Class;
-  rdfs:comment "Représente une entité (site web, organisation, personne) d'où provient l'information originale ou la preuve."@fr;
-  rdfs:label "Source"@fr .
-:SupportingEvidence a owl:Class;
-  rdfs:subClassOf :Evidence;
-  rdfs:label "Preuve à l'appui"@fr .
-:SystemeExterne a owl:Class;
-  rdfs:comment "Représente une source de données ou un service externe utilisé pendant le processus de vérification (API, base de données)."@fr;
-  rdfs:label "Système Externe"@fr .
-:User a owl:Class;
-  rdfs:comment "Représente une personne interagissant avec le système de vérification."@fr;
-  rdfs:label "Utilisateur"@fr .
-:VerificationCriterion a owl:Class;
-  rdfs:comment "Aspect spécifique évalué lors de la vérification (ex: réputation de la source, cohérence)."@fr;
-  rdfs:label "Critère de Vérification"@fr .
-:VerificationMethod a owl:Class;
-  rdfs:comment "Représente une approche (règle, modèle IA) utilisée pour évaluer la crédibilité."@fr;
-  rdfs:label "Méthode de Vérification"@fr .
-:Criteria_AuthorExpertise a owl:NamedIndividual, :VerificationCriterion;
-  rdfs:label "Expertise de l'auteur"@fr .
-:Criteria_CoherenceAnalysis a owl:NamedIndividual, :VerificationCriterion;
-  rdfs:label "Analyse de la cohérence"@fr .
-:Criteria_CrossReferencing a owl:NamedIndividual, :VerificationCriterion;
-  rdfs:label "Références croisées"@fr .
-:Criteria_FactCheckDB a owl:NamedIndividual, :VerificationCriterion;
-  rdfs:label "Consultation base de données Fact-Check"@fr .
-:Criteria_SourceReputation a owl:NamedIndividual, :VerificationCriterion;
-  rdfs:label "Réputation de la source"@fr .
-:Criteria_ToneAnalysis a owl:NamedIndividual, :VerificationCriterion;
-  rdfs:label "Analyse du ton (ex: neutre, biaisé)"@fr .
-_:genid60 owl:maxCardinality "1"^^xsd:nonNegativeInteger .
-_:genid61 a owl:AllDisjointClasses;
-  owl:members _:genid66 .
-_:genid66 a rdf:List;
-  rdf:first :AcademicJournal;
-  rdf:rest _:genid65 .
-_:genid65 a rdf:List;
-  rdf:first :FactCheckingOrganization;
-  rdf:rest _:genid64 .
-_:genid64 a rdf:List;
-  rdf:first :NewsWebsite;
-  rdf:rest _:genid63 .
-_:genid63 a rdf:List;
-  rdf:first :PersonalBlog;
-  rdf:rest _:genid62 .
-_:genid62 a rdf:List;
-  rdf:first :SocialMediaPlatform;
-  rdf:rest rdf:nil .
-_:genid67 a owl:AllDisjointClasses;
-  owl:members _:genid70 .
-_:genid70 a rdf:List;
-  rdf:first :ApiLLM;
-  rdf:rest _:genid69 .
-_:genid69 a rdf:List;
-  rdf:first :BaseDeFaits;
-  rdf:rest _:genid68 .
-_:genid68 a rdf:List;
-  rdf:first :MoteurRecherche;
-  rdf:rest rdf:nil .
-_:genid71 a owl:AllDisjointClasses;
-  owl:members _:genid74 .
-_:genid74 a rdf:List;
-  rdf:first :InformationFaibleCredibilite;
-  rdf:rest _:genid73 .
-_:genid73 a rdf:List;
-  rdf:first :InformationHauteCredibilite;
-  rdf:rest _:genid72 .
-_:genid72 a rdf:List;
-  rdf:first :InformationMoyenneCredibilite;
-  rdf:rest rdf:nil .
-_:genid75 a owl:AllDisjointClasses;
-  owl:members _:genid79 .
-_:genid79 a rdf:List;
-  rdf:first :Niveau_Bas;
-  rdf:rest _:genid78 .
-_:genid78 a rdf:List;
-  rdf:first :Niveau_Haut;
-  rdf:rest _:genid77 .
-_:genid77 a rdf:List;
-  rdf:first :Niveau_Moyen;
-  rdf:rest _:genid76 .
-_:genid76 a rdf:List;
-  rdf:first :Niveau_NonVerifie;
-  rdf:rest rdf:nil .

requirements-distilled.txt DELETED Viewed

@@ -1,51 +0,0 @@
-# SysCRED - Optimized Requirements with Distilled Models
-# Système Hybride de Vérification de Crédibilité
-# (c) Dominique S. Loyer
-#
-# This version uses DISTILLED models for faster loading and lower memory:
-# - DistilBERT instead of BERT (~60% smaller, 40% faster)
-# - MiniLM for sentence embeddings (~5x smaller than all-mpnet)
-# - Optimized for HuggingFace Spaces (16GB RAM limit)
-# === Core Dependencies ===
-requests>=2.28.0
-beautifulsoup4>=4.11.0
-python-whois>=0.8.0
-# === RDF/Ontology ===
-rdflib>=6.0.0
-# === Machine Learning (Distilled/Optimized) ===
-# Using CPU-only torch for smaller footprint
---extra-index-url https://download.pytorch.org/whl/cpu
-torch>=2.0.0
-# Transformers with minimal dependencies
-transformers>=4.30.0
-# Distilled sentence transformer (5x smaller than full models)
-sentence-transformers>=2.2.0
-# Data processing
-numpy>=1.24.0
-pandas>=2.0.0
-# === Explainability ===
-lime>=0.2.0
-# === NLP for NER (French + English) ===
-spacy>=3.5.0
-# Note: Download models in Dockerfile with:
-# python -m spacy download fr_core_news_sm
-# python -m spacy download en_core_web_sm
-# === Web Backend ===
-flask>=2.3.0
-flask-cors>=4.0.0
-python-dotenv>=1.0.0
-# === Production ===
-gunicorn>=20.1.0
-# === Development/Testing ===
-pytest>=7.0.0

requirements.txt DELETED Viewed

@@ -1,38 +0,0 @@
-# SysCRED - Requirements (Full version with ML models)
-# Système Hybride de Vérification de Crédibilité
-# (c) Dominique S. Loyer
-# Version complète pour HuggingFace Spaces et développement local
-# === Core Dependencies ===
-requests>=2.28.0
-beautifulsoup4>=4.11.0
-python-whois>=0.8.0
-lxml>=4.9.0
-# === RDF/Ontology ===
-rdflib>=6.0.0
-# === Machine Learning ===
-transformers>=4.30.0
-torch>=2.0.0
-numpy>=1.24.0
-sentence-transformers>=2.2.0
-accelerate>=0.20.0
-spacy>=3.6.0
-# === Explainability ===
-lime>=0.2.0
-# === Web Backend ===
-flask>=2.3.0
-flask-cors>=4.0.0
-python-dotenv>=1.0.0
-pandas>=2.0.0
-# === Production/Database ===
-gunicorn>=20.1.0
-psycopg2-binary>=2.9.0
-flask-sqlalchemy>=3.0.0
-# === Development/Testing ===
-pytest>=7.0.0

syscred/__init__.py CHANGED Viewed

@@ -9,19 +9,17 @@ Citation Key: loyerModelingHybridSystem2025
 Modules:
 - api_clients: Web scraping, WHOIS, Fact Check APIs
 - ir_engine: BM25, QLD, TF-IDF, PRF (from TREC)
-- trec_retriever: Evidence retrieval for fact-checking (v2.3)
-- trec_dataset: TREC AP88-90 data loader (v2.3)
-- liar_dataset: LIAR benchmark dataset loader (v2.3)
 - seo_analyzer: SEO analysis, PageRank estimation
 - eval_metrics: MAP, NDCG, P@K, Recall, MRR
 - ontology_manager: RDFLib integration
 - verification_system: Main credibility pipeline
-- graph_rag: GraphRAG for contextual memory (v2.3)
-- ner_analyzer: Named Entity Recognition with spaCy (v2.4)
-- eeat_calculator: Google E-E-A-T metrics (v2.4)
 """
-__version__ = "2.4.0"
 __author__ = "Dominique S. Loyer"
 __citation__ = "loyerModelingHybridSystem2025"
@@ -34,15 +32,11 @@ from syscred.ir_engine import IREngine
 from syscred.eval_metrics import EvaluationMetrics
 from syscred.graph_rag import GraphRAG
-# NER and E-E-A-T (NEW - v2.4)
-from syscred.ner_analyzer import NERAnalyzer
-from syscred.eeat_calculator import EEATCalculator, EEATScore
-# TREC Integration (v2.3)
 from syscred.trec_retriever import TRECRetriever, Evidence, RetrievalResult
 from syscred.trec_dataset import TRECDataset, TRECTopic
-# LIAR Benchmark (v2.3)
 from syscred.liar_dataset import LIARDataset, LiarStatement, LiarLabel
 # Convenience alias
@@ -58,17 +52,13 @@ __all__ = [
     'IREngine',
     'EvaluationMetrics',
     'GraphRAG',
-    # NER & E-E-A-T (NEW v2.4)
-    'NERAnalyzer',
-    'EEATCalculator',
-    'EEATScore',
-    # TREC (v2.3)
     'TRECRetriever',
     'TRECDataset',
     'TRECTopic',
     'Evidence',
     'RetrievalResult',
-    # LIAR Benchmark (v2.3)
     'LIARDataset',
     'LiarStatement',
     'LiarLabel',

 Modules:
 - api_clients: Web scraping, WHOIS, Fact Check APIs
 - ir_engine: BM25, QLD, TF-IDF, PRF (from TREC)
+- trec_retriever: Evidence retrieval for fact-checking (NEW v2.3)
+- trec_dataset: TREC AP88-90 data loader (NEW v2.3)
+- liar_dataset: LIAR benchmark dataset loader (NEW v2.3)
 - seo_analyzer: SEO analysis, PageRank estimation
 - eval_metrics: MAP, NDCG, P@K, Recall, MRR
 - ontology_manager: RDFLib integration
 - verification_system: Main credibility pipeline
+- graph_rag: GraphRAG for contextual memory (enhanced v2.3)
 """
+__version__ = "2.3.1"
 __author__ = "Dominique S. Loyer"
 __citation__ = "loyerModelingHybridSystem2025"
 from syscred.eval_metrics import EvaluationMetrics
 from syscred.graph_rag import GraphRAG
+# TREC Integration (NEW - Feb 2026)
 from syscred.trec_retriever import TRECRetriever, Evidence, RetrievalResult
 from syscred.trec_dataset import TRECDataset, TRECTopic
+# LIAR Benchmark (NEW - Feb 2026)
 from syscred.liar_dataset import LIARDataset, LiarStatement, LiarLabel
 # Convenience alias
     'IREngine',
     'EvaluationMetrics',
     'GraphRAG',
+    # TREC (NEW)
     'TRECRetriever',
     'TRECDataset',
     'TRECTopic',
     'Evidence',
     'RetrievalResult',
+    # LIAR Benchmark (NEW)
     'LIARDataset',
     'LiarStatement',
     'LiarLabel',

syscred/backend_app.py CHANGED Viewed

@@ -22,16 +22,12 @@ import traceback
 from pathlib import Path
 try:
     from dotenv import load_dotenv
-    # .env is at project root (parent of syscred/)
-    env_path = Path(__file__).resolve().parent.parent / '.env'
-    if not env_path.exists():
-        # Fallback: check syscred/ directory
-        env_path = Path(__file__).parent / '.env'
     if env_path.exists():
         load_dotenv(env_path)
         print(f"[SysCRED Backend] Loaded .env from {env_path}")
     else:
-        print(f"[SysCRED Backend] No .env file found, using system env vars")
 except ImportError:
     print("[SysCRED Backend] python-dotenv not installed, using system env vars")
@@ -89,16 +85,6 @@ except ImportError as e:
 app = Flask(__name__)
 CORS(app)  # Enable CORS for frontend
-# Allow iframe embedding on UQAM domains (for syscred.uqam.ca mirror)
-@app.after_request
-def add_security_headers(response):
-    """Add security headers allowing UQAM iframe embedding."""
-    response.headers['X-Frame-Options'] = 'ALLOW-FROM https://syscred.uqam.ca'
-    response.headers['Content-Security-Policy'] = (
-        "frame-ancestors 'self' https://syscred.uqam.ca https://*.uqam.ca"
-    )
-    return response
 # Initialize Database
 try:
     init_db(app) # [NEW] Setup DB connection
@@ -281,62 +267,6 @@ def verify_endpoint():
         print(f"[SysCRED Backend] Score: {result.get('scoreCredibilite', 'N/A')}")
-        # [NEW] TREC Evidence Search + IR Metrics
-        try:
-            global trec_retriever, eval_metrics
-            # Initialize TREC if needed
-            if trec_retriever is None and TREC_AVAILABLE:
-                trec_retriever = TRECRetriever(use_stemming=True, enable_prf=False)
-                trec_retriever.corpus = TREC_DEMO_CORPUS
-                eval_metrics = EvaluationMetrics()
-                print("[SysCRED Backend] TREC Retriever initialized with demo corpus")
-            if trec_retriever and eval_metrics:
-                import time
-                start_time = time.time()
-                # Use the input text as query
-                query_text = input_data[:200] if not credibility_system.is_url(input_data) else result.get('informationEntree', input_data)[:200]
-                trec_result = trec_retriever.retrieve_evidence(query_text, k=5, model='bm25')
-                search_time = (time.time() - start_time) * 1000
-                retrieved_ids = [e.doc_id for e in trec_result.evidences]
-                # Use climate-related docs as "relevant" for demo evaluation
-                # In production, this would come from qrels files
-                relevant_ids = set(TREC_DEMO_CORPUS.keys())  # All docs as relevant pool
-                # Compute IR metrics
-                k = len(retrieved_ids) if retrieved_ids else 1
-                precision = eval_metrics.precision_at_k(retrieved_ids, relevant_ids, k) if retrieved_ids else 0
-                recall = eval_metrics.recall_at_k(retrieved_ids, relevant_ids, k) if retrieved_ids else 0
-                ap = eval_metrics.average_precision(retrieved_ids, relevant_ids) if retrieved_ids else 0
-                mrr = eval_metrics.mrr(retrieved_ids, relevant_ids) if retrieved_ids else 0
-                relevance_dict = {doc: 1 for doc in relevant_ids}
-                ndcg = eval_metrics.ndcg_at_k(retrieved_ids, relevance_dict, k) if retrieved_ids else 0
-                # TF-IDF score from top result
-                tfidf_score = trec_result.evidences[0].score if trec_result.evidences else 0
-                result['trec_metrics'] = {
-                    'precision': round(precision, 4),
-                    'recall': round(recall, 4),
-                    'map': round(ap, 4),
-                    'ndcg': round(ndcg, 4),
-                    'tfidf_score': round(tfidf_score, 4),
-                    'mrr': round(mrr, 4),
-                    'retrieved_count': len(retrieved_ids),
-                    'corpus_size': len(TREC_DEMO_CORPUS),
-                    'search_time_ms': round(search_time, 2)
-                }
-                print(f"[SysCRED Backend] TREC: P={precision:.3f} R={recall:.3f} MAP={ap:.3f} NDCG={ndcg:.3f} MRR={mrr:.3f}")
-        except Exception as e:
-            print(f"[SysCRED Backend] TREC metrics error: {e}")
-            result['trec_metrics'] = {'error': str(e)}
         # [NEW] Persist to Database
         try:
             new_analysis = AnalysisResult(

 from pathlib import Path
 try:
     from dotenv import load_dotenv
+    env_path = Path(__file__).parent / '.env'
     if env_path.exists():
         load_dotenv(env_path)
         print(f"[SysCRED Backend] Loaded .env from {env_path}")
     else:
+        print(f"[SysCRED Backend] No .env file found at {env_path}")
 except ImportError:
     print("[SysCRED Backend] python-dotenv not installed, using system env vars")
 app = Flask(__name__)
 CORS(app)  # Enable CORS for frontend
 # Initialize Database
 try:
     init_db(app) # [NEW] Setup DB connection
         print(f"[SysCRED Backend] Score: {result.get('scoreCredibilite', 'N/A')}")
         # [NEW] Persist to Database
         try:
             new_analysis = AnalysisResult(

syscred/config.py CHANGED Viewed

@@ -23,19 +23,17 @@ from pathlib import Path
 from typing import Dict, Optional
 from dotenv import load_dotenv
 # Charger les variables depuis .env (Project Root)
-# Path: .../systemFactChecking/syscred/config.py
-# Root .env is at .../systemFactChecking/.env (1 level up from syscred/)
 current_path = Path(__file__).resolve()
-env_path = current_path.parent.parent / '.env'
 if not env_path.exists():
     print(f"[Config] WARNING: .env not found at {env_path}")
-    # Try alternate locations
-    for alt in [Path.cwd() / '.env', Path.cwd().parent / '.env']:
-        if alt.exists():
-            env_path = alt
-            break
 load_dotenv(dotenv_path=env_path)
 print(f"[Config] Loading .env from {env_path}")
@@ -52,9 +50,8 @@ class Config:
     """
     # === Chemins ===
-    # BASE_DIR = project root (parent of syscred/)
     BASE_DIR = Path(__file__).parent.parent
-    ONTOLOGY_BASE_PATH = BASE_DIR / "ontology" / "sysCRED_onto26avrtil.ttl"
     ONTOLOGY_DATA_PATH = BASE_DIR / "ontology" / "sysCRED_data.ttl"
     # === Serveur Flask ===
@@ -64,7 +61,7 @@ class Config:
     # === API Keys ===
     GOOGLE_FACT_CHECK_API_KEY = os.getenv("SYSCRED_GOOGLE_API_KEY")
-    DATABASE_URL = os.getenv("SYSCRED_DATABASE_URL", os.getenv("DATABASE_URL"))  # Standardized env var
     # === Modèles ML ===
     # Support both SYSCRED_LOAD_ML and SYSCRED_LOAD_ML_MODELS (for Render)

 from typing import Dict, Optional
 from dotenv import load_dotenv
+# Charger les variables depuis .env
 # Charger les variables depuis .env (Project Root)
+# Path: .../systemFactChecking/02_Code/syscred/config.py
+# Root .env is at .../systemFactChecking/.env (3 levels up)
 current_path = Path(__file__).resolve()
+env_path = current_path.parent.parent.parent / '.env'
 if not env_path.exists():
     print(f"[Config] WARNING: .env not found at {env_path}")
+    # Try alternate location (sometimes CWD matters)
+    env_path = Path.cwd().parent / '.env'
 load_dotenv(dotenv_path=env_path)
 print(f"[Config] Loading .env from {env_path}")
     """
     # === Chemins ===
     BASE_DIR = Path(__file__).parent.parent
+    ONTOLOGY_BASE_PATH = BASE_DIR / "sysCRED_onto26avrtil.ttl"
     ONTOLOGY_DATA_PATH = BASE_DIR / "ontology" / "sysCRED_data.ttl"
     # === Serveur Flask ===
     # === API Keys ===
     GOOGLE_FACT_CHECK_API_KEY = os.getenv("SYSCRED_GOOGLE_API_KEY")
+    DATABASE_URL = os.getenv("DATABASE_URL") # [NEW] Read DB URL from env
     # === Modèles ML ===
     # Support both SYSCRED_LOAD_ML and SYSCRED_LOAD_ML_MODELS (for Render)

syscred/database.py CHANGED Viewed

@@ -3,7 +3,6 @@
 Database Manager for SysCRED
 ===========================
 Handles connection to Supabase (PostgreSQL) and defines models.
-Falls back to SQLite if PostgreSQL is unavailable.
 """
 import os
@@ -33,38 +32,23 @@ class AnalysisResult(db.Model):
             'url': self.url,
             'score': self.credibility_score,
             'summary': self.summary,
-            'created_at': self.created_at.isoformat() if self.created_at else None,
             'source_reputation': self.source_reputation
         }
 def init_db(app):
     """Initialize the database with the Flask app."""
-    # Use SYSCRED_DATABASE_URL first (from .env), fallback to DATABASE_URL (from Render/HF)
-    db_url = os.environ.get('SYSCRED_DATABASE_URL') or os.environ.get('DATABASE_URL')
     if db_url and db_url.startswith("postgres://"):
         db_url = db_url.replace("postgres://", "postgresql://", 1)
-    # Test PostgreSQL reachability before committing to it
-    if db_url and 'postgresql' in db_url:
-        try:
-            import socket
-            from urllib.parse import urlparse
-            parsed = urlparse(db_url)
-            socket.getaddrinfo(parsed.hostname, parsed.port or 5432)
-        except (socket.gaierror, Exception) as e:
-            print(f"[SysCRED-DB] PostgreSQL host unreachable ({parsed.hostname}): {e}")
-            print("[SysCRED-DB] Falling back to SQLite...")
-            db_url = None  # Force SQLite fallback
-    app.config['SQLALCHEMY_DATABASE_URI'] = "postgresql://postgres.zmluirvqfkmfazqitqgi:FactCheckingSystem2026_test@aws-1-us-east-1.pooler.supabase.com:5432/postgres"
     app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
     db.init_app(app)
     with app.app_context():
-        try:
-            db.create_all()
-            db_type = 'PostgreSQL (Supabase)' if db_url else 'SQLite (local)'
-            print(f"[SysCRED-DB] Database initialized: {db_type}")
-        except Exception as e:
-            print(f"[SysCRED-DB] Database init error: {e}")

 Database Manager for SysCRED
 ===========================
 Handles connection to Supabase (PostgreSQL) and defines models.
 """
 import os
             'url': self.url,
             'score': self.credibility_score,
             'summary': self.summary,
+            'created_at': self.created_at.isoformat(),
             'source_reputation': self.source_reputation
         }
 def init_db(app):
     """Initialize the database with the Flask app."""
+    # Fallback to sqlite for local dev if no DATABASE_URL
+    db_url = os.environ.get('DATABASE_URL')
     if db_url and db_url.startswith("postgres://"):
         db_url = db_url.replace("postgres://", "postgresql://", 1)
+    app.config['SQLALCHEMY_DATABASE_URI'] = db_url or 'sqlite:///syscred.db'
     app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
     db.init_app(app)
+    # Create tables if they don't exist (basic migration)
     with app.app_context():
+        db.create_all()
+        print("[SysCRED-DB] Database tables initialized.")

syscred/db_store.py DELETED Viewed

@@ -1,354 +0,0 @@
-"""
-SysCRED Storage Module - SQLite + Supabase
-==========================================
-Stocke les triplets RDF et résultats d'analyse.
-Utilise SQLite localement, avec option de sync vers Supabase.
-"""
-import os
-import sqlite3
-import hashlib
-import json
-from datetime import datetime
-from typing import Optional, Dict, Any, List, Tuple
-from urllib.parse import urlparse
-from pathlib import Path
-# Chemins
-BASE_DIR = Path(__file__).parent
-DB_PATH = BASE_DIR / "syscred_local.db"
-class SysCREDStore:
-    """
-    Gestionnaire de stockage pour SysCRED.
-    SQLite local avec option Supabase.
-    """
-    def __init__(self, db_path: str = None, supabase_url: str = None):
-        self.db_path = db_path or str(DB_PATH)
-        self.supabase_url = supabase_url or os.getenv("DATABASE_URL")
-        self.conn = None
-        self._init_local_db()
-    def _init_local_db(self):
-        """Initialise la base SQLite locale."""
-        self.conn = sqlite3.connect(self.db_path, check_same_thread=False)
-        self.conn.row_factory = sqlite3.Row
-        # Créer les tables
-        self.conn.executescript("""
-            -- Résultats d'analyse
-            CREATE TABLE IF NOT EXISTS analysis_results (
-                id INTEGER PRIMARY KEY AUTOINCREMENT,
-                url TEXT NOT NULL,
-                credibility_score REAL NOT NULL,
-                summary TEXT,
-                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-                source_reputation TEXT,
-                fact_check_count INTEGER DEFAULT 0,
-                score_details TEXT,
-                domain TEXT
-            );
-            -- Triplets RDF
-            CREATE TABLE IF NOT EXISTS rdf_triples (
-                id INTEGER PRIMARY KEY AUTOINCREMENT,
-                subject TEXT NOT NULL,
-                predicate TEXT NOT NULL,
-                object TEXT NOT NULL,
-                object_type TEXT DEFAULT 'uri',
-                graph_name TEXT DEFAULT 'data',
-                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-                UNIQUE(subject, predicate, object, graph_name)
-            );
-            -- Sources
-            CREATE TABLE IF NOT EXISTS sources (
-                id INTEGER PRIMARY KEY AUTOINCREMENT,
-                domain TEXT UNIQUE NOT NULL,
-                reputation_score REAL,
-                domain_age_years REAL,
-                is_fact_checker INTEGER DEFAULT 0,
-                analysis_count INTEGER DEFAULT 0,
-                last_analyzed TIMESTAMP,
-                metadata TEXT,
-                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
-            );
-            -- Claims
-            CREATE TABLE IF NOT EXISTS claims (
-                id INTEGER PRIMARY KEY AUTOINCREMENT,
-                claim_text TEXT NOT NULL,
-                claim_hash TEXT UNIQUE,
-                source_url TEXT,
-                extracted_entities TEXT,
-                credibility_score REAL,
-                verification_status TEXT DEFAULT 'unverified',
-                evidence_count INTEGER DEFAULT 0,
-                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
-            );
-            -- Evidence
-            CREATE TABLE IF NOT EXISTS evidence (
-                id INTEGER PRIMARY KEY AUTOINCREMENT,
-                claim_id INTEGER,
-                doc_id TEXT,
-                doc_text TEXT,
-                relevance_score REAL,
-                retrieval_method TEXT DEFAULT 'bm25',
-                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-                FOREIGN KEY (claim_id) REFERENCES claims(id)
-            );
-            -- Index
-            CREATE INDEX IF NOT EXISTS idx_analysis_url ON analysis_results(url);
-            CREATE INDEX IF NOT EXISTS idx_triple_subject ON rdf_triples(subject);
-            CREATE INDEX IF NOT EXISTS idx_triple_graph ON rdf_triples(graph_name);
-            CREATE INDEX IF NOT EXISTS idx_sources_domain ON sources(domain);
-        """)
-        self.conn.commit()
-        print(f"[SysCREDStore] SQLite initialisé: {self.db_path}")
-    # =========================================================================
-    # ONTOLOGY / RDF TRIPLES
-    # =========================================================================
-    def sync_ontology(self, ontology_manager) -> Dict[str, int]:
-        """
-        Synchronise les graphes RDFLib vers SQLite.
-        Args:
-            ontology_manager: Instance avec base_graph et data_graph
-        """
-        result = {'base_synced': 0, 'data_synced': 0}
-        try:
-            # Sync base ontology
-            if hasattr(ontology_manager, 'base_graph') and ontology_manager.base_graph:
-                result['base_synced'] = self._sync_graph(
-                    ontology_manager.base_graph,
-                    graph_name='base'
-                )
-            # Sync data graph
-            if hasattr(ontology_manager, 'data_graph') and ontology_manager.data_graph:
-                result['data_synced'] = self._sync_graph(
-                    ontology_manager.data_graph,
-                    graph_name='data'
-                )
-            self.conn.commit()
-            print(f"[SysCREDStore] Synced {result['base_synced']} base + {result['data_synced']} data triples")
-        except Exception as e:
-            result['error'] = str(e)
-            print(f"[SysCREDStore] Sync error: {e}")
-        return result
-    def _sync_graph(self, graph, graph_name: str) -> int:
-        """Sync un graphe RDFLib vers SQLite."""
-        from rdflib import Literal
-        count = 0
-        cursor = self.conn.cursor()
-        for s, p, o in graph:
-            subject = str(s)
-            predicate = str(p)
-            obj_value = str(o)
-            obj_type = 'literal' if isinstance(o, Literal) else 'uri'
-            try:
-                cursor.execute("""
-                    INSERT OR IGNORE INTO rdf_triples
-                    (subject, predicate, object, object_type, graph_name)
-                    VALUES (?, ?, ?, ?, ?)
-                """, (subject, predicate, obj_value, obj_type, graph_name))
-                count += 1
-            except:
-                pass
-        return count
-    def get_triple_stats(self) -> Dict[str, int]:
-        """Statistiques des triplets."""
-        cursor = self.conn.cursor()
-        cursor.execute("SELECT COUNT(*) FROM rdf_triples WHERE graph_name = 'base'")
-        base = cursor.fetchone()[0]
-        cursor.execute("SELECT COUNT(*) FROM rdf_triples WHERE graph_name = 'data'")
-        data = cursor.fetchone()[0]
-        return {
-            'base_triples': base,
-            'data_triples': data,
-            'total_triples': base + data
-        }
-    # =========================================================================
-    # ANALYSIS RESULTS
-    # =========================================================================
-    def save_analysis(self, url: str, credibility_score: float,
-                      summary: str = None, score_details: Dict = None,
-                      source_reputation: str = None, fact_check_count: int = 0) -> int:
-        """Sauvegarde un résultat d'analyse."""
-        domain = urlparse(url).netloc
-        cursor = self.conn.cursor()
-        cursor.execute("""
-            INSERT INTO analysis_results
-            (url, credibility_score, summary, score_details, source_reputation,
-             fact_check_count, domain)
-            VALUES (?, ?, ?, ?, ?, ?, ?)
-        """, (
-            url, credibility_score, summary,
-            json.dumps(score_details) if score_details else None,
-            source_reputation, fact_check_count, domain
-        ))
-        self.conn.commit()
-        result_id = cursor.lastrowid
-        print(f"[SysCREDStore] Saved analysis #{result_id} for {domain}")
-        # Update source stats
-        self._update_source(domain, credibility_score)
-        return result_id
-    def get_history(self, url: str = None, limit: int = 50) -> List[Dict]:
-        """Récupère l'historique des analyses."""
-        cursor = self.conn.cursor()
-        if url:
-            cursor.execute("""
-                SELECT * FROM analysis_results
-                WHERE url = ? ORDER BY created_at DESC LIMIT ?
-            """, (url, limit))
-        else:
-            cursor.execute("""
-                SELECT * FROM analysis_results
-                ORDER BY created_at DESC LIMIT ?
-            """, (limit,))
-        return [dict(row) for row in cursor.fetchall()]
-    # =========================================================================
-    # SOURCES
-    # =========================================================================
-    def _update_source(self, domain: str, score: float = None):
-        """Met à jour les stats d'une source."""
-        cursor = self.conn.cursor()
-        cursor.execute("SELECT id, analysis_count FROM sources WHERE domain = ?", (domain,))
-        row = cursor.fetchone()
-        if row:
-            cursor.execute("""
-                UPDATE sources SET
-                    analysis_count = analysis_count + 1,
-                    last_analyzed = CURRENT_TIMESTAMP,
-                    reputation_score = COALESCE(?, reputation_score)
-                WHERE domain = ?
-            """, (score, domain))
-        else:
-            cursor.execute("""
-                INSERT INTO sources (domain, reputation_score, analysis_count, last_analyzed)
-                VALUES (?, ?, 1, CURRENT_TIMESTAMP)
-            """, (domain, score))
-        self.conn.commit()
-    def get_source(self, domain: str) -> Optional[Dict]:
-        """Récupère les infos d'une source."""
-        cursor = self.conn.cursor()
-        cursor.execute("SELECT * FROM sources WHERE domain = ?", (domain,))
-        row = cursor.fetchone()
-        return dict(row) if row else None
-    # =========================================================================
-    # GLOBAL STATS
-    # =========================================================================
-    def get_stats(self) -> Dict[str, Any]:
-        """Statistiques globales."""
-        cursor = self.conn.cursor()
-        cursor.execute("SELECT COUNT(*) FROM analysis_results")
-        total_analyses = cursor.fetchone()[0]
-        cursor.execute("SELECT COUNT(*) FROM sources")
-        unique_domains = cursor.fetchone()[0]
-        cursor.execute("SELECT AVG(credibility_score) FROM analysis_results")
-        avg_score = cursor.fetchone()[0]
-        triple_stats = self.get_triple_stats()
-        return {
-            'total_analyses': total_analyses,
-            'unique_domains': unique_domains,
-            'avg_credibility': round(avg_score, 2) if avg_score else None,
-            **triple_stats
-        }
-    def close(self):
-        """Ferme la connexion."""
-        if self.conn:
-            self.conn.close()
-# ============================================================================
-# INTEGRATION
-# ============================================================================
-def sync_ontology_to_db():
-    """Synchronise l'ontologie vers la base de données."""
-    import sys
-    sys.path.insert(0, str(BASE_DIR))
-    try:
-        from ontology_manager import OntologyManager
-        from config import Config
-        # Init ontology
-        onto = OntologyManager(
-            base_ontology_path=str(Config.ONTOLOGY_BASE_PATH),
-            data_path=str(Config.ONTOLOGY_DATA_PATH)
-        )
-        # Init store
-        store = SysCREDStore()
-        # Sync
-        result = store.sync_ontology(onto)
-        print(f"\n✅ Sync complete: {result}")
-        # Stats
-        stats = store.get_stats()
-        print(f"📊 Stats: {stats}")
-        return store
-    except ImportError as e:
-        print(f"Import error: {e}")
-        return None
-# ============================================================================
-# CLI
-# ============================================================================
-if __name__ == "__main__":
-    print("=" * 60)
-    print("SysCRED Storage - Synchronisation des triplets")
-    print("=" * 60)
-    store = sync_ontology_to_db()
-    if store:
-        print("\n✅ Base de données prête!")
-        print(f"   Fichier: {store.db_path}")

syscred/demo_server.py DELETED Viewed

@@ -1,77 +0,0 @@
-from flask import Flask, send_from_directory, jsonify, request
-from flask_cors import CORS
-import requests
-app = Flask(__name__, static_folder="static")
-CORS(app)
-KEY = "AIzaSyBiuY4AxuPgHcrViQJQ6BcKs1wOIqsiz74"
-def fact_check(q):
-    try:
-        r = requests.get("https://factchecktools.googleapis.com/v1alpha1/claims:search",
-            params={"query": q[:200], "key": KEY, "languageCode": "fr"}, timeout=10)
-        if r.status_code == 200:
-            return [{"claim": c.get("text",""), "rating": c.get("claimReview",[{}])[0].get("textualRating","N/A")}
-                    for c in r.json().get("claims",[])[:5]]
-    except Exception as e:
-        print(f"FactCheck error: {e}")
-    return []
-@app.route("/")
-def home():
-    return send_from_directory("static", "index.html")
-@app.route("/static/<path:f>")
-def static_f(f):
-    return send_from_directory("static", f)
-@app.route("/api/verify", methods=["POST"])
-def verify():
-    d = request.get_json()
-    fc = fact_check(d.get("input_data",""))
-    return jsonify({
-        "informationEntree": d.get("input_data",""),
-        "scoreCredibilite": 0.72,
-        "resumeAnalyse": f"{len(fc)} fact check(s) trouvé(s)" if fc else "Mode Demo",
-        "reglesAppliquees": {"fact_checking": fc},
-        "analyseNLP": {"sentiment": {"label": "NEUTRAL", "score": 0.65}, "coherence_score": 0.78,
-                       "bias_analysis": {"score": 0.2, "label": "Low Bias"}, "entities": []},
-        "eeat_score": {"experience": 0.72, "expertise": 0.68, "authority": 0.75, "trust": 0.8, "overall": 0.74},
-        "trec_metrics": {"precision": 0.82, "recall": 0.75, "map": 0.68, "ndcg": 0.72, "tfidf": 0.45, "mrr": 1.0}
-    })
-@app.route("/api/ontology/graph")
-def graph():
-    return jsonify({
-        "nodes": [
-            {"id": "syscred:source_analyzed", "label": "Source Analysée", "type": "Source", "score": 0.72,
-             "uri": "http://syscred.uqam.ca/ontology#SourceAnalyzed"},
-            {"id": "syscred:claim_primary", "label": "Affirmation Principale", "type": "Claim", "score": 0.65,
-             "uri": "http://syscred.uqam.ca/ontology#PrimaryClaim"},
-            {"id": "syscred:evidence_trec", "label": "Preuve TREC", "type": "Evidence", "score": 0.82,
-             "uri": "http://syscred.uqam.ca/ontology#TRECEvidence"},
-            {"id": "syscred:evidence_factcheck", "label": "Google Fact Check", "type": "Evidence", "score": 0.78,
-             "uri": "http://syscred.uqam.ca/ontology#FactCheckEvidence"},
-            {"id": "syscred:entity_syscred", "label": "SysCRED", "type": "Entity", "score": 0.9,
-             "uri": "http://syscred.uqam.ca/ontology#SysCRED"},
-            {"id": "syscred:entity_uqam", "label": "UQAM", "type": "Entity", "score": 0.85,
-             "uri": "http://dbpedia.org/resource/Université_du_Québec_à_Montréal"},
-            {"id": "syscred:metric_eeat", "label": "E-E-A-T Score", "type": "Metric", "score": 0.74,
-             "uri": "http://syscred.uqam.ca/ontology#EEATMetric"},
-            {"id": "syscred:metric_trec", "label": "TREC Precision", "type": "Metric", "score": 0.82,
-             "uri": "http://syscred.uqam.ca/ontology#TRECPrecision"}
-        ],
-        "links": [
-            {"source": "syscred:source_analyzed", "target": "syscred:claim_primary", "relation": "contient"},
-            {"source": "syscred:claim_primary", "target": "syscred:evidence_trec", "relation": "supporté_par"},
-            {"source": "syscred:claim_primary", "target": "syscred:evidence_factcheck", "relation": "vérifié_par"},
-            {"source": "syscred:source_analyzed", "target": "syscred:entity_syscred", "relation": "mentionne"},
-            {"source": "syscred:source_analyzed", "target": "syscred:entity_uqam", "relation": "mentionne"},
-            {"source": "syscred:source_analyzed", "target": "syscred:metric_eeat", "relation": "évalué_par"},
-            {"source": "syscred:evidence_trec", "target": "syscred:metric_trec", "relation": "mesuré_par"}
-        ]
-    })
-if __name__ == "__main__":
-    print("🚀 SysCRED + FactCheck: http://localhost:5001")
-    app.run(host="0.0.0.0", port=5001, debug=False)

syscred/eeat_calculator.py CHANGED Viewed

@@ -1,118 +1,41 @@
-#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
-E-E-A-T Metrics Calculator for SysCRED
-========================================
-Calculates Google-style E-E-A-T metrics (Experience, Expertise, Authority, Trust).
-These metrics mirror modern Google ranking signals:
-- Experience: Domain age, content freshness
-- Expertise: Author identification, depth of content
-- Authority: PageRank simulation, citations/backlinks
-- Trust: HTTPS, fact-checks, low bias score
 """
-from typing import Dict, Any, Optional, List
-from dataclasses import dataclass
 import re
-from datetime import datetime
-import logging
-logger = logging.getLogger(__name__)
-@dataclass
-class EEATScore:
-    """E-E-A-T score container."""
-    experience: float  # 0-1
-    expertise: float   # 0-1
-    authority: float   # 0-1
-    trust: float       # 0-1
-    @property
-    def overall(self) -> float:
-        """Weighted average of all E-E-A-T components."""
-        # Weights based on Google's emphasis
-        weights = {
-            'experience': 0.15,
-            'expertise': 0.25,
-            'authority': 0.35,
-            'trust': 0.25
-        }
-        return (
-            self.experience * weights['experience'] +
-            self.expertise * weights['expertise'] +
-            self.authority * weights['authority'] +
-            self.trust * weights['trust']
-        )
-    def to_dict(self) -> Dict[str, Any]:
-        """Convert to dictionary for JSON serialization."""
-        return {
-            'experience': round(self.experience, 3),
-            'expertise': round(self.expertise, 3),
-            'authority': round(self.authority, 3),
-            'trust': round(self.trust, 3),
-            'overall': round(self.overall, 3),
-            'experience_pct': f"{int(self.experience * 100)}%",
-            'expertise_pct': f"{int(self.expertise * 100)}%",
-            'authority_pct': f"{int(self.authority * 100)}%",
-            'trust_pct': f"{int(self.trust * 100)}%",
-            'overall_pct': f"{int(self.overall * 100)}%"
-        }
 class EEATCalculator:
     """
-    Calculate E-E-A-T metrics from various signals.
-    Mirrors Google's quality rater guidelines:
-    - Experience: Has the author demonstrated real experience?
-    - Expertise: Is the content expert-level?
-    - Authority: Is the source recognized as authoritative?
-    - Trust: Is the source trustworthy?
     """
-    # Known authoritative domains
-    AUTHORITATIVE_DOMAINS = {
-        # News
-        'lemonde.fr': 0.95,
-        'lefigaro.fr': 0.90,
-        'liberation.fr': 0.88,
-        'nytimes.com': 0.95,
-        'washingtonpost.com': 0.93,
-        'theguardian.com': 0.92,
-        'bbc.com': 0.94,
-        'bbc.co.uk': 0.94,
-        'reuters.com': 0.96,
-        'apnews.com': 0.95,
-        # Academic
-        'nature.com': 0.98,
-        'science.org': 0.98,
-        'pubmed.ncbi.nlm.nih.gov': 0.97,
-        'scholar.google.com': 0.85,
-        # Government
-        'gouv.fr': 0.90,
-        'gov.uk': 0.90,
-        'whitehouse.gov': 0.88,
-        'europa.eu': 0.92,
-        # Fact-checkers
-        'snopes.com': 0.88,
-        'factcheck.org': 0.90,
-        'politifact.com': 0.88,
-        'fullfact.org': 0.89,
-        # Wikipedia (moderate authority)
-        'wikipedia.org': 0.75,
-        'fr.wikipedia.org': 0.75,
-        'en.wikipedia.org': 0.75,
     }
-    # Low-trust domains (misinformation sources)
-    LOW_TRUST_DOMAINS = {
-        'infowars.com': 0.1,
-        'breitbart.com': 0.3,
-        'naturalnews.com': 0.15,
-        # Add more as needed
     }
     def __init__(self):
@@ -121,346 +44,227 @@ class EEATCalculator:
     def calculate(
         self,
-        url: str,
-        text: str,
-        nlp_analysis: Optional[Dict[str, Any]] = None,
-        pagerank: Optional[float] = None,
-        fact_checks: Optional[List[Dict]] = None,
-        domain_age_years: Optional[float] = None,
-        has_https: bool = True,
-        author_identified: bool = False,
-        seo_score: Optional[float] = None
-    ) -> EEATScore:
         """
-        Calculate E-E-A-T scores from available signals.
         Args:
             url: Source URL
-            text: Article text content
-            nlp_analysis: NLP analysis results (sentiment, coherence, bias)
-            pagerank: Simulated PageRank score (0-1)
-            fact_checks: List of fact-check results
-            domain_age_years: Domain age in years (from WHOIS)
-            has_https: Whether site uses HTTPS
-            author_identified: Whether author is clearly identified
-            seo_score: SEO/technical quality score
-        Returns:
-            EEATScore with all component scores
-        """
-        # Extract domain from URL
-        domain = self._extract_domain(url)
-        # Calculate each component
-        experience = self._calculate_experience(
-            domain_age_years,
-            text,
-            nlp_analysis
-        )
-        expertise = self._calculate_expertise(
-            text,
-            author_identified,
-            nlp_analysis
-        )
-        authority = self._calculate_authority(
-            domain,
-            pagerank,
-            seo_score
-        )
-        trust = self._calculate_trust(
-            domain,
-            has_https,
-            fact_checks,
-            nlp_analysis
-        )
-        return EEATScore(
-            experience=experience,
-            expertise=expertise,
-            authority=authority,
-            trust=trust
-        )
-    def _extract_domain(self, url: str) -> str:
-        """Extract domain from URL."""
-        import re
-        match = re.search(r'https?://(?:www\.)?([^/]+)', url)
-        return match.group(1).lower() if match else url.lower()
-    def _calculate_experience(
-        self,
-        domain_age_years: Optional[float],
-        text: str,
-        nlp_analysis: Optional[Dict]
-    ) -> float:
-        """
-        Calculate Experience score.
-        Factors:
-        - Domain age (longer = more experience)
-        - Content freshness (recently updated)
-        - First-hand experience indicators in text
-        """
-        score = 0.5  # Base score
-        # Domain age contribution (max 0.3)
-        if domain_age_years is not None:
-            age_score = min(domain_age_years / 20, 1.0) * 0.3  # 20 years = max
-            score += age_score
-        else:
-            score += 0.15  # Assume moderate age
-        # Content depth contribution (max 0.2)
-        word_count = len(text.split()) if text else 0
-        if word_count > 1000:
-            score += 0.2
-        elif word_count > 500:
-            score += 0.15
-        elif word_count > 200:
-            score += 0.1
-        # First-hand experience indicators (max 0.1)
-        experience_indicators = [
-            r'\b(j\'ai|je suis|nous avons|I have|we have|in my experience)\b',
-            r'\b(interview|entretien|témoignage|witness|firsthand)\b',
-            r'\b(sur place|on the ground|eyewitness)\b'
-        ]
-        for pattern in experience_indicators:
-            if re.search(pattern, text, re.IGNORECASE):
-                score += 0.03
-        return min(score, 1.0)
-    def _calculate_expertise(
-        self,
-        text: str,
-        author_identified: bool,
-        nlp_analysis: Optional[Dict]
-    ) -> float:
-        """
-        Calculate Expertise score.
-        Factors:
-        - Author identification
-        - Technical depth of content
-        - Citation of sources
-        - Coherence (from NLP)
-        """
-        score = 0.4  # Base score
-        # Author identification (0.2)
-        if author_identified:
-            score += 0.2
-        # Citation indicators (max 0.2)
-        citation_patterns = [
-            r'\b(selon|according to|d\'après|source:)\b',
-            r'\b(étude|study|research|rapport|report)\b',
-            r'\b(expert|spécialiste|chercheur|professor|Dr\.)\b',
-            r'\[([\d]+)\]',  # [1] style citations
-            r'https?://[^\s]+'  # Links
-        ]
-        citation_count = 0
-        for pattern in citation_patterns:
-            citation_count += len(re.findall(pattern, text, re.IGNORECASE))
-        score += min(citation_count * 0.02, 0.2)
-        # Coherence from NLP analysis (0.2)
-        if nlp_analysis and 'coherence' in nlp_analysis:
-            coherence = nlp_analysis['coherence']
-            if isinstance(coherence, dict):
-                coherence = coherence.get('score', 0.5)
-            score += coherence * 0.2
-        else:
-            score += 0.1  # Assume moderate coherence
-        return min(score, 1.0)
-    def _calculate_authority(
-        self,
-        domain: str,
-        pagerank: Optional[float],
-        seo_score: Optional[float]
-    ) -> float:
-        """
-        Calculate Authority score.
-        Factors:
-        - Known authoritative domain
-        - PageRank simulation
-        - SEO/technical quality
         """
-        score = 0.3  # Base score
-        # Known domain authority (max 0.5)
-        for known_domain, authority in self.AUTHORITATIVE_DOMAINS.items():
-            if known_domain in domain:
-                score = max(score, authority * 0.5 + 0.3)
-                break
-        # Check low-trust domains
-        for low_trust_domain, low_score in self.LOW_TRUST_DOMAINS.items():
-            if low_trust_domain in domain:
-                score = min(score, low_score)
-                break
-        # PageRank contribution (max 0.3)
-        if pagerank is not None:
-            score += pagerank * 0.3
-        else:
-            score += 0.15  # Assume moderate pagerank
-        # SEO score contribution (max 0.2)
-        if seo_score is not None:
-            score += seo_score * 0.2
-        else:
-            score += 0.1
-        return min(score, 1.0)
-    def _calculate_trust(
-        self,
-        domain: str,
-        has_https: bool,
-        fact_checks: Optional[List[Dict]],
-        nlp_analysis: Optional[Dict]
-    ) -> float:
-        """
-        Calculate Trust score.
-        Factors:
-        - HTTPS
-        - Fact-check results
-        - Bias score (low = better)
-        - Known trustworthy domain
-        """
-        score = 0.4  # Base score
-        # HTTPS (0.1)
-        if has_https:
-            score += 0.1
-        # Fact-check results (max 0.3)
-        if fact_checks:
-            positive_checks = sum(1 for fc in fact_checks
-                                  if fc.get('rating', '').lower() in ['true', 'vrai', 'correct'])
-            negative_checks = sum(1 for fc in fact_checks
-                                  if fc.get('rating', '').lower() in ['false', 'faux', 'incorrect', 'pants-fire'])
-            if positive_checks > 0:
-                score += 0.2
-            if negative_checks > 0:
-                score -= 0.3
-        # Bias score (max 0.2, lower bias = higher trust)
-        if nlp_analysis:
-            bias_data = nlp_analysis.get('bias_analysis', {})
-            if isinstance(bias_data, dict):
-                bias_score = bias_data.get('score', 0.3)
-            else:
-                bias_score = 0.3
-            # Invert: low bias = high trust contribution
-            score += (1 - bias_score) * 0.2
-        else:
-            score += 0.1
-        # Known trustworthy domain (0.1)
-        for known_domain in self.AUTHORITATIVE_DOMAINS:
-            if known_domain in domain:
-                score += 0.1
-                break
-        # Known low-trust domain (penalty)
-        for low_trust_domain in self.LOW_TRUST_DOMAINS:
-            if low_trust_domain in domain:
-                score -= 0.3
-                break
-        return max(min(score, 1.0), 0.0)
-    def explain_score(self, eeat: EEATScore, url: str) -> str:
-        """
-        Generate human-readable explanation of E-E-A-T score.
-        Args:
-            eeat: EEATScore instance
-            url: Source URL
-        Returns:
-            Formatted explanation string
-        """
-        domain = self._extract_domain(url)
         explanations = []
-        # Experience
-        if eeat.experience >= 0.8:
-            explanations.append(f"✅ **Expérience élevée** ({eeat.experience_pct}): Source établie depuis longtemps")
-        elif eeat.experience >= 0.5:
-            explanations.append(f"🔶 **Expérience moyenne** ({eeat.experience_pct}): Source modérément établie")
         else:
-            explanations.append(f"⚠️ **Expérience faible** ({eeat.experience_pct}): Source récente ou peu connue")
-        # Expertise
-        if eeat.expertise >= 0.8:
-            explanations.append(f"✅ **Expertise élevée** ({eeat.expertise_pct}): Contenu approfondi avec citations")
-        elif eeat.expertise >= 0.5:
-            explanations.append(f"🔶 **Expertise moyenne** ({eeat.expertise_pct}): Contenu standard")
         else:
-            explanations.append(f"⚠️ **Expertise faible** ({eeat.expertise_pct}): Manque de profondeur")
-        # Authority
-        if eeat.authority >= 0.8:
-            explanations.append(f"✅ **Autorité élevée** ({eeat.authority_pct}): Source très citée et reconnue")
-        elif eeat.authority >= 0.5:
-            explanations.append(f"🔶 **Autorité moyenne** ({eeat.authority_pct}): Source modérément reconnue")
         else:
-            explanations.append(f"⚠️ **Autorité faible** ({eeat.authority_pct}): Peu de citations externes")
-        # Trust
-        if eeat.trust >= 0.8:
-            explanations.append(f"✅ **Confiance élevée** ({eeat.trust_pct}): Faits vérifiés, pas de biais")
-        elif eeat.trust >= 0.5:
-            explanations.append(f"🔶 **Confiance moyenne** ({eeat.trust_pct}): Quelques signaux de confiance")
         else:
-            explanations.append(f"⚠️ **Confiance faible** ({eeat.trust_pct}): Prudence recommandée")
         return "\n".join(explanations)
-# Test
 if __name__ == "__main__":
     calc = EEATCalculator()
-    test_url = "https://www.lemonde.fr/politique/article/2024/01/06/trump.html"
     test_text = """
-    Selon une étude du chercheur Dr. Martin, l'insurrection du 6 janvier 2021
-    au Capitol a été un événement marquant. Notre reporter sur place a témoigné
-    des événements. Les experts politiques analysent les conséquences.
     """
-    nlp_analysis = {
-        'coherence': {'score': 0.8},
-        'bias_analysis': {'score': 0.2}
-    }
-    eeat = calc.calculate(
         url=test_url,
         text=test_text,
-        nlp_analysis=nlp_analysis,
-        pagerank=0.7,
-        has_https=True,
-        author_identified=True
     )
-    print("=== E-E-A-T Scores ===")
-    print(f"Experience: {eeat.experience_pct}")
-    print(f"Expertise:  {eeat.expertise_pct}")
-    print(f"Authority:  {eeat.authority_pct}")
-    print(f"Trust:      {eeat.trust_pct}")
-    print(f"Overall:    {eeat.overall_pct}")
-    print("\n=== Explanation ===")
-    print(calc.explain_score(eeat, test_url))

 # -*- coding: utf-8 -*-
 """
+E-E-A-T Calculator Module - SysCRED
+====================================
+Google Quality Rater Guidelines implementation.
+E-E-A-T Scores:
+- Experience: Domain age, content richness
+- Expertise: Technical vocabulary, citations
+- Authority: Estimated PageRank, backlinks
+- Trust: HTTPS, unbiased sentiment
+(c) Dominique S. Loyer - PhD Thesis Prototype
 """
 import re
+from typing import Dict, Optional
+from urllib.parse import urlparse
 class EEATCalculator:
     """
+    Calculate E-E-A-T scores based on Google Quality Rater Guidelines.
     """
+    # Technical terms that indicate expertise
+    TECHNICAL_TERMS = {
+        'research', 'study', 'analysis', 'data', 'evidence', 'methodology',
+        'peer-reviewed', 'journal', 'university', 'professor', 'dr.', 'phd',
+        'statistics', 'experiment', 'hypothesis', 'publication', 'citation',
+        'algorithm', 'framework', 'systematic', 'empirical', 'quantitative'
     }
+    # Trusted domains (simplified list)
+    TRUSTED_DOMAINS = {
+        '.edu', '.gov', '.org', 'reuters.com', 'apnews.com', 'bbc.com',
+        'nature.com', 'science.org', 'who.int', 'un.org', 'wikipedia.org',
+        'lemonde.fr', 'radio-canada.ca', 'uqam.ca', 'umontreal.ca'
     }
     def __init__(self):
     def calculate(
         self,
+        url: Optional[str] = None,
+        text: Optional[str] = None,
+        sentiment_score: float = 0.5,
+        has_citations: bool = False,
+        domain_age_years: int = 0
+    ) -> Dict:
         """
+        Calculate E-E-A-T scores.
         Args:
             url: Source URL
+            text: Content text
+            sentiment_score: 0-1 (0.5 = neutral is best for trust)
+            has_citations: Whether content has citations
+            domain_age_years: Estimated domain age
+        Returns:
+            {
+                'experience': 0.75,
+                'expertise': 0.80,
+                'authority': 0.65,
+                'trust': 0.90,
+                'overall': 0.78,
+                'details': {...}
+            }
         """
+        details = {}
+        # --- EXPERIENCE ---
+        experience = 0.5
+        if domain_age_years >= 10:
+            experience += 0.3
+        elif domain_age_years >= 5:
+            experience += 0.2
+        elif domain_age_years >= 2:
+            experience += 0.1
+        if text:
+            word_count = len(text.split())
+            if word_count >= 1000:
+                experience += 0.15
+            elif word_count >= 500:
+                experience += 0.1
+        experience = min(experience, 1.0)
+        details['experience_factors'] = {
+            'domain_age_bonus': domain_age_years >= 2,
+            'content_richness': len(text.split()) if text else 0
+        }
+        # --- EXPERTISE ---
+        expertise = 0.4
+        tech_count = 0
+        if text:
+            text_lower = text.lower()
+            for term in self.TECHNICAL_TERMS:
+                if term in text_lower:
+                    tech_count += 1
+            if tech_count >= 5:
+                expertise += 0.35
+            elif tech_count >= 3:
+                expertise += 0.25
+            elif tech_count >= 1:
+                expertise += 0.15
+        if has_citations:
+            expertise += 0.2
+        expertise = min(expertise, 1.0)
+        details['expertise_factors'] = {
+            'technical_terms_found': tech_count,
+            'has_citations': has_citations
+        }
+        # --- AUTHORITY ---
+        authority = 0.3
+        if url:
+            parsed = urlparse(url)
+            domain = parsed.netloc.lower()
+            for trusted in self.TRUSTED_DOMAINS:
+                if trusted in domain:
+                    authority += 0.4
+                    break
+            if parsed.scheme == 'https':
+                authority += 0.1
+        # Check for author indicators in text
+        if text:
+            author_patterns = [r'by\s+\w+\s+\w+', r'author:', r'written by', r'par\s+\w+']
+            for pattern in author_patterns:
+                if re.search(pattern, text.lower()):
+                    authority += 0.15
+                    break
+        authority = min(authority, 1.0)
+        details['authority_factors'] = {
+            'trusted_domain': False,
+            'https': url and urlparse(url).scheme == 'https' if url else False
+        }
+        # --- TRUST ---
+        trust = 0.5
+        # Neutral sentiment is best (0.5)
+        sentiment_deviation = abs(sentiment_score - 0.5)
+        if sentiment_deviation < 0.1:
+            trust += 0.3  # Very neutral
+        elif sentiment_deviation < 0.2:
+            trust += 0.2
+        elif sentiment_deviation < 0.3:
+            trust += 0.1
+        if url and urlparse(url).scheme == 'https':
+            trust += 0.15
+        trust = min(trust, 1.0)
+        details['trust_factors'] = {
+            'sentiment_neutrality': 1 - sentiment_deviation * 2,
+            'secure_connection': url and 'https' in url if url else False
+        }
+        # --- OVERALL ---
+        overall = (experience * 0.2 + expertise * 0.3 +
+                   authority * 0.25 + trust * 0.25)
+        return {
+            'experience': round(experience, 2),
+            'expertise': round(expertise, 2),
+            'authority': round(authority, 2),
+            'trust': round(trust, 2),
+            'overall': round(overall, 2),
+            'details': details
+        }
+    def get_explanation(self, scores: Dict) -> str:
+        """Generate human-readable explanation of E-E-A-T scores."""
         explanations = []
+        exp = scores.get('experience', 0)
+        if exp >= 0.7:
+            explanations.append("✅ Expérience: Source établie avec contenu riche")
+        elif exp >= 0.5:
+            explanations.append("⚠️ Expérience: Source moyennement établie")
         else:
+            explanations.append("❌ Expérience: Source nouvelle ou contenu limité")
+        ext = scores.get('expertise', 0)
+        if ext >= 0.7:
+            explanations.append("✅ Expertise: Vocabulaire technique, citations présentes")
+        elif ext >= 0.5:
+            explanations.append("⚠️ Expertise: Niveau technique moyen")
         else:
+            explanations.append("❌ Expertise: Manque de terminologie spécialisée")
+        auth = scores.get('authority', 0)
+        if auth >= 0.7:
+            explanations.append("✅ Autorité: Domaine reconnu et fiable")
+        elif auth >= 0.5:
+            explanations.append("⚠️ Autorité: Niveau d'autorité moyen")
         else:
+            explanations.append("❌ Autorité: Source non reconnue")
+        tr = scores.get('trust', 0)
+        if tr >= 0.7:
+            explanations.append("✅ Confiance: Ton neutre, connexion sécurisée")
+        elif tr >= 0.5:
+            explanations.append("⚠️ Confiance: Niveau de confiance moyen")
         else:
+            explanations.append("❌ Confiance: Ton biaisé ou connexion non sécurisée")
         return "\n".join(explanations)
+# Singleton
+_calculator = None
+def get_calculator() -> EEATCalculator:
+    """Get or create E-E-A-T calculator singleton."""
+    global _calculator
+    if _calculator is None:
+        _calculator = EEATCalculator()
+    return _calculator
+# --- Testing ---
 if __name__ == "__main__":
+    print("=" * 60)
+    print("SysCRED E-E-A-T Calculator - Test")
+    print("=" * 60)
     calc = EEATCalculator()
+    test_url = "https://www.nature.com/articles/example"
     test_text = """
+    A peer-reviewed study published in the journal Nature found evidence
+    that the new methodology significantly improves research outcomes.
+    Dr. Smith from Harvard University presented the statistics at the conference.
     """
+    result = calc.calculate(
         url=test_url,
         text=test_text,
+        sentiment_score=0.5,
+        has_citations=True,
+        domain_age_years=15
     )
+    print("\n--- E-E-A-T Scores ---")
+    print(f"  Experience: {result['experience']:.0%}")
+    print(f"  Expertise:  {result['expertise']:.0%}")
+    print(f"  Authority:  {result['authority']:.0%}")
+    print(f"  Trust:      {result['trust']:.0%}")
+    print(f"  ─────────────────")
+    print(f"  OVERALL:    {result['overall']:.0%}")
+    print("\n--- Explanation ---")
+    print(calc.get_explanation(result))
+    print("\n" + "=" * 60)

syscred/ner_analyzer.py CHANGED Viewed

@@ -1,283 +1,198 @@
-#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
-Named Entity Recognition (NER) Analyzer for SysCRED
-====================================================
-Extracts named entities from text using spaCy.
-Entities detected:
-- PER: Persons (Donald Trump, Emmanuel Macron)
-- ORG: Organizations (FBI, UN, Google)
-- LOC: Locations (Paris, Capitol)
-- DATE: Dates (January 6, 2021)
-- MONEY: Amounts ($10 million)
-- EVENT: Events (insurrection, election)
 """
-from typing import Dict, List, Any, Optional
-import logging
-# Try to import spaCy
 try:
     import spacy
-    from spacy.language import Language
     HAS_SPACY = True
 except ImportError:
     HAS_SPACY = False
-    spacy = None
-logger = logging.getLogger(__name__)
 class NERAnalyzer:
     """
-    Named Entity Recognition analyzer using spaCy.
-    Supports French (fr_core_news_md) and English (en_core_web_md).
-    Falls back to heuristic extraction if spaCy is not available.
     """
-    # Entity type mappings for display
-    ENTITY_LABELS = {
-        'PER': {'fr': 'Personne', 'en': 'Person', 'emoji': '👤'},
-        'PERSON': {'fr': 'Personne', 'en': 'Person', 'emoji': '👤'},
-        'ORG': {'fr': 'Organisation', 'en': 'Organization', 'emoji': '🏢'},
-        'LOC': {'fr': 'Lieu', 'en': 'Location', 'emoji': '📍'},
-        'GPE': {'fr': 'Lieu géopolitique', 'en': 'Geopolitical', 'emoji': '🌍'},
-        'DATE': {'fr': 'Date', 'en': 'Date', 'emoji': '📅'},
-        'TIME': {'fr': 'Heure', 'en': 'Time', 'emoji': '⏰'},
-        'MONEY': {'fr': 'Montant', 'en': 'Money', 'emoji': '💰'},
-        'PERCENT': {'fr': 'Pourcentage', 'en': 'Percent', 'emoji': '📊'},
-        'EVENT': {'fr': 'Événement', 'en': 'Event', 'emoji': '📰'},
-        'PRODUCT': {'fr': 'Produit', 'en': 'Product', 'emoji': '📦'},
-        'LAW': {'fr': 'Loi', 'en': 'Law', 'emoji': '⚖️'},
-        'NORP': {'fr': 'Groupe', 'en': 'Group', 'emoji': '👥'},
-        'MISC': {'fr': 'Divers', 'en': 'Miscellaneous', 'emoji': '🔖'},
     }
-    def __init__(self, model_name: str = "fr_core_news_md", fallback: bool = True):
         """
         Initialize NER analyzer.
         Args:
-            model_name: spaCy model to load (fr_core_news_md, en_core_web_md)
-            fallback: If True, use heuristics when spaCy unavailable
         """
-        self.model_name = model_name
-        self.fallback = fallback
         self.nlp = None
-        self.use_heuristics = False
         if HAS_SPACY:
             try:
                 self.nlp = spacy.load(model_name)
-                logger.info(f"[NER] Loaded spaCy model: {model_name}")
-            except OSError as e:
-                logger.warning(f"[NER] Could not load model {model_name}: {e}")
-                if fallback:
-                    self.use_heuristics = True
-                    logger.info("[NER] Using heuristic entity extraction")
-        else:
-            if fallback:
-                self.use_heuristics = True
-                logger.info("[NER] spaCy not installed. Using heuristic extraction")
-    def extract_entities(self, text: str) -> Dict[str, List[Dict[str, Any]]]:
         """
         Extract named entities from text.
-        Args:
-            text: Input text to analyze
         Returns:
-            Dictionary mapping entity types to lists of entities
-            Each entity has: text, start, end, label, label_display, emoji, confidence
         """
-        if not text or len(text.strip()) == 0:
-            return {}
-        if self.nlp:
-            return self._extract_with_spacy(text)
-        elif self.use_heuristics:
-            return self._extract_with_heuristics(text)
-        else:
-            return {}
-    def _extract_with_spacy(self, text: str) -> Dict[str, List[Dict[str, Any]]]:
-        """Extract entities using spaCy NLP."""
         doc = self.nlp(text)
-        entities: Dict[str, List[Dict[str, Any]]] = {}
         for ent in doc.ents:
-            label = ent.label_
-            # Get display info
-            label_info = self.ENTITY_LABELS.get(label, {
-                'fr': label,
-                'en': label,
-                'emoji': '🔖'
-            })
-            entity_data = {
                 'text': ent.text,
                 'start': ent.start_char,
-                'end': ent.end_char,
-                'label': label,
-                'label_display': label_info.get('fr', label),
-                'emoji': label_info.get('emoji', '🔖'),
-                'confidence': 0.85  # spaCy doesn't provide confidence by default
             }
-            if label not in entities:
-                entities[label] = []
-            # Avoid duplicates
-            if not any(e['text'].lower() == entity_data['text'].lower() for e in entities[label]):
-                entities[label].append(entity_data)
-        return entities
-    def _extract_with_heuristics(self, text: str) -> Dict[str, List[Dict[str, Any]]]:
-        """
-        Fallback heuristic entity extraction.
-        Uses pattern matching for common entities.
-        """
-        import re
-        entities: Dict[str, List[Dict[str, Any]]] = {}
-        # Common patterns
-        patterns = {
-            'PER': [
-                # Known political figures
-                r'\b(Donald Trump|Joe Biden|Emmanuel Macron|Hillary Clinton|Barack Obama|'
-                r'Vladimir Putin|Angela Merkel|Justin Trudeau|Boris Johnson)\b',
-            ],
-            'ORG': [
-                r'\b(FBI|CIA|NSA|ONU|NATO|OTAN|Google|Facebook|Twitter|Meta|'
-                r'Amazon|Microsoft|Apple|CNN|BBC|Le Monde|New York Times|'
-                r'Parti Républicain|Parti Démocrate|Republican Party|Democratic Party)\b',
-            ],
-            'LOC': [
-                r'\b(Capitol|White House|Maison Blanche|Kremlin|Élysée|Pentagon|'
-                r'New York|Washington|Paris|Londres|Moscou|Berlin|Beijing)\b',
-            ],
-            'DATE': [
-                r'\b(\d{1,2}\s+(janvier|février|mars|avril|mai|juin|juillet|août|'
-                r'septembre|octobre|novembre|décembre)\s+\d{4})\b',
-                r'\b(\d{1,2}[-/]\d{1,2}[-/]\d{2,4})\b',
-                r'\b(January|February|March|April|May|June|July|August|'
-                r'September|October|November|December)\s+\d{1,2},?\s+\d{4}\b',
-            ],
-            'MONEY': [
-                r'\$[\d,]+(?:\.\d{2})?(?:\s*(?:million|billion|trillion))?',
-                r'[\d,]+(?:\.\d{2})?\s*(?:dollars?|euros?|€|\$)',
-                r'[\d,]+\s*(?:million|milliard)s?\s*(?:de\s+)?(?:dollars?|euros?)',
-            ],
-            'PERCENT': [
-                r'\b\d+(?:\.\d+)?%',
-                r'\b\d+(?:\.\d+)?\s*pour\s*cent',
-                r'\b\d+(?:\.\d+)?\s*percent',
-            ],
         }
-        for label, pattern_list in patterns.items():
-            label_info = self.ENTITY_LABELS.get(label, {'fr': label, 'emoji': '🔖'})
-            for pattern in pattern_list:
-                for match in re.finditer(pattern, text, re.IGNORECASE):
-                    entity_data = {
-                        'text': match.group(),
-                        'start': match.start(),
-                        'end': match.end(),
-                        'label': label,
-                        'label_display': label_info.get('fr', label),
-                        'emoji': label_info.get('emoji', '🔖'),
-                        'confidence': 0.70  # Lower confidence for heuristics
-                    }
-                    if label not in entities:
-                        entities[label] = []
-                    # Avoid duplicates
-                    if not any(e['text'].lower() == entity_data['text'].lower()
-                              for e in entities[label]):
-                        entities[label].append(entity_data)
-        return entities
-    def get_entity_summary(self, entities: Dict[str, List[Dict[str, Any]]]) -> str:
         """
-        Generate a human-readable summary of extracted entities.
-        Args:
-            entities: Dictionary of entities from extract_entities()
-        Returns:
-            Formatted string summary
         """
-        if not entities:
-            return "Aucune entité nommée détectée."
-        lines = []
-        for label, ent_list in entities.items():
-            label_info = self.ENTITY_LABELS.get(label, {'fr': label, 'emoji': '🔖'})
-            emoji = label_info.get('emoji', '🔖')
-            label_display = label_info.get('fr', label)
-            entity_texts = [e['text'] for e in ent_list[:5]]  # Limit to 5
-            lines.append(f"{emoji} {label_display}: {', '.join(entity_texts)}")
-        return "\n".join(lines)
-    def to_frontend_format(self, entities: Dict[str, List[Dict[str, Any]]]) -> List[Dict]:
-        """
-        Convert entities to frontend-friendly format.
-        Returns:
-            List of entities with all info for display
-        """
-        result = []
-        for label, ent_list in entities.items():
-            for ent in ent_list:
-                result.append({
-                    'text': ent['text'],
-                    'type': ent['label'],
-                    'type_display': ent.get('label_display', ent['label']),
-                    'emoji': ent.get('emoji', '🔖'),
-                    'confidence': ent.get('confidence', 0.5),
-                    'confidence_pct': f"{int(ent.get('confidence', 0.5) * 100)}%"
-                })
-        # Sort by confidence
-        result.sort(key=lambda x: x['confidence'], reverse=True)
         return result
-# Singleton instance for easy import
-_ner_analyzer: Optional[NERAnalyzer] = None
-def get_ner_analyzer(model_name: str = "fr_core_news_md") -> NERAnalyzer:
-    """Get or create singleton NER analyzer instance."""
-    global _ner_analyzer
-    if _ner_analyzer is None:
-        _ner_analyzer = NERAnalyzer(model_name=model_name, fallback=True)
-    return _ner_analyzer
-# Quick test
 if __name__ == "__main__":
-    analyzer = NERAnalyzer(fallback=True)
     test_text = """
-    Donald Trump a affirmé que l'insurrection du 6 janvier 2021 au Capitol n'est jamais arrivée.
-    Le FBI enquête sur les événements. Le président Joe Biden a condamné ces déclarations à Washington.
-    Les dégâts sont estimés à 30 millions de dollars.
     """
-    entities = analyzer.extract_entities(test_text)
-    print("=== Entités détectées ===")
-    print(analyzer.get_entity_summary(entities))
-    print("\n=== Format Frontend ===")
-    for e in analyzer.to_frontend_format(entities):
-        print(f"  {e['emoji']} {e['text']} ({e['type_display']}, {e['confidence_pct']})")

 # -*- coding: utf-8 -*-
 """
+NER Analyzer Module - SysCRED
+==============================
+Named Entity Recognition for fact-checking enhancement.
+Extracts: PERSON, ORG, GPE, DATE, MISC entities
+(c) Dominique S. Loyer - PhD Thesis Prototype
 """
+import os
+# Check for spaCy
 try:
     import spacy
     HAS_SPACY = True
 except ImportError:
     HAS_SPACY = False
+    print("[NER] spaCy not installed. NER disabled.")
 class NERAnalyzer:
     """
+    Named Entity Recognition using spaCy.
+    Supports:
+    - French (fr_core_news_md)
+    - English (en_core_web_sm)
     """
+    # Entity type mapping with icons
+    ENTITY_ICONS = {
+        'PERSON': '👤',
+        'PER': '👤',
+        'ORG': '🏢',
+        'GPE': '📍',
+        'LOC': '📍',
+        'DATE': '📅',
+        'TIME': '🕐',
+        'MONEY': '💰',
+        'MISC': '🏷️',
+        'NORP': '👥',
+        'FAC': '🏛️',
+        'PRODUCT': '📦',
+        'EVENT': '🎉',
+        'WORK_OF_ART': '🎨',
+        'LAW': '⚖️',
+        'LANGUAGE': '🗣️',
     }
+    def __init__(self, language: str = 'en'):
         """
         Initialize NER analyzer.
         Args:
+            language: 'en' or 'fr'
         """
+        self.language = language
         self.nlp = None
+        self.enabled = False
         if HAS_SPACY:
+            self._load_model()
+    def _load_model(self):
+        """Load the appropriate spaCy model."""
+        models = {
+            'en': ['en_core_web_sm', 'en_core_web_md'],
+            'fr': ['fr_core_news_md', 'fr_core_news_sm']
+        }
+        for model_name in models.get(self.language, models['en']):
             try:
                 self.nlp = spacy.load(model_name)
+                self.enabled = True
+                print(f"[NER] Loaded model: {model_name}")
+                break
+            except OSError:
+                continue
+        if not self.enabled:
+            print(f"[NER] No model found for language: {self.language}")
+    def extract_entities(self, text: str) -> dict:
         """
         Extract named entities from text.
         Returns:
+            {
+                'entities': [
+                    {'text': 'Emmanuel Macron', 'type': 'PERSON', 'icon': '👤'},
+                    ...
+                ],
+                'summary': {
+                    'PERSON': ['Emmanuel Macron'],
+                    'ORG': ['UQAM', 'Google'],
+                    ...
+                }
+            }
         """
+        if not self.enabled or not text:
+            return {'entities': [], 'summary': {}}
         doc = self.nlp(text)
+        entities = []
+        summary = {}
+        seen = set()
         for ent in doc.ents:
+            # Avoid duplicates
+            key = (ent.text.lower(), ent.label_)
+            if key in seen:
+                continue
+            seen.add(key)
+            entity = {
                 'text': ent.text,
+                'type': ent.label_,
+                'icon': self.ENTITY_ICONS.get(ent.label_, '🏷️'),
                 'start': ent.start_char,
+                'end': ent.end_char
             }
+            entities.append(entity)
+            # Group by type
+            if ent.label_ not in summary:
+                summary[ent.label_] = []
+            summary[ent.label_].append(ent.text)
+        return {
+            'entities': entities,
+            'summary': summary,
+            'count': len(entities)
         }
+    def analyze_for_factcheck(self, text: str) -> dict:
         """
+        Analyze text for fact-checking relevance.
+        Returns entities with credibility hints.
         """
+        result = self.extract_entities(text)
+        # Add fact-checking hints
+        hints = []
+        for ent in result.get('entities', []):
+            if ent['type'] in ['PERSON', 'PER']:
+                hints.append(f"Verify claims about {ent['text']}")
+            elif ent['type'] == 'ORG':
+                hints.append(f"Check {ent['text']} official sources")
+            elif ent['type'] in ['GPE', 'LOC']:
+                hints.append(f"Verify location: {ent['text']}")
+            elif ent['type'] == 'DATE':
+                hints.append(f"Confirm date: {ent['text']}")
+        result['fact_check_hints'] = hints[:5]  # Top 5 hints
         return result
+# Singleton instance
+_analyzer = None
+def get_analyzer(language: str = 'en') -> NERAnalyzer:
+    """Get or create the NER analyzer singleton."""
+    global _analyzer
+    if _analyzer is None:
+        _analyzer = NERAnalyzer(language)
+    return _analyzer
+# --- Testing ---
 if __name__ == "__main__":
+    print("=" * 60)
+    print("SysCRED NER Analyzer - Test")
+    print("=" * 60)
+    analyzer = NERAnalyzer('en')
     test_text = """
+    Emmanuel Macron announced today that France will invest €500 million
+    in AI research. The announcement was made at the UQAM in Montreal, Canada
+    on February 8, 2026. Google and Microsoft also confirmed their participation.
     """
+    result = analyzer.analyze_for_factcheck(test_text)
+    print("\n--- Entities Found ---")
+    for ent in result['entities']:
+        print(f"  {ent['icon']} {ent['text']} ({ent['type']})")
+    print("\n--- Fact-Check Hints ---")
+    for hint in result.get('fact_check_hints', []):
+        print(f"  • {hint}")
+    print("\n" + "=" * 60)

syscred/ontology_manager.py CHANGED Viewed

@@ -47,7 +47,7 @@ class OntologyManager:
     """
     # Namespace for the credibility ontology
-    CRED_NS = "https://syscred.uqam.ca/ontology#"
     def __init__(self, base_ontology_path: Optional[str] = None, data_path: Optional[str] = None):
         """
@@ -254,7 +254,7 @@ class OntologyManager:
         # SPARQL query to find all evaluations for this URL
         query = """
-        PREFIX cred: <https://syscred.uqam.ca/ontology#>
         PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
         SELECT ?report ?score ?level ?timestamp ?content
@@ -298,7 +298,7 @@ class OntologyManager:
         # Count evaluations
         query = """
-        PREFIX cred: <https://syscred.uqam.ca/ontology#>
         SELECT (COUNT(?report) as ?count) WHERE {
             ?report a cred:RapportEvaluation .
         }
@@ -321,7 +321,7 @@ class OntologyManager:
         # Get the latest report ID
         latest_query = """
-        PREFIX cred: <https://syscred.uqam.ca/ontology#>
         SELECT ?report ?timestamp WHERE {
             ?report a cred:RapportEvaluation .
             ?report cred:completionTimestamp ?timestamp .
@@ -355,7 +355,7 @@ class OntologyManager:
         # Query triples related to this report (Level 1)
         related_query = """
-        PREFIX cred: <https://syscred.uqam.ca/ontology#>
         SELECT ?p ?o ?oType ?oLabel WHERE {
             <%s> ?p ?o .
             OPTIONAL { ?o a ?oType } .
@@ -463,8 +463,8 @@ if __name__ == "__main__":
     print("=== Testing OntologyManager ===\n")
     # Test with base ontology
-    base_path = os.path.join(os.path.dirname(__file__), '..', 'ontology', 'sysCRED_onto26avrtil.ttl')
-    data_path = os.path.join(os.path.dirname(__file__), '..', 'ontology', 'sysCRED_data.ttl')
     manager = OntologyManager(base_ontology_path=base_path, data_path=None)

     """
     # Namespace for the credibility ontology
+    CRED_NS = "https://github.com/DominiqueLoyer/systemFactChecking#"
     def __init__(self, base_ontology_path: Optional[str] = None, data_path: Optional[str] = None):
         """
         # SPARQL query to find all evaluations for this URL
         query = """
+        PREFIX cred: <http://www.dic9335.uqam.ca/ontologies/credibility-verification#>
         PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
         SELECT ?report ?score ?level ?timestamp ?content
         # Count evaluations
         query = """
+        PREFIX cred: <http://www.dic9335.uqam.ca/ontologies/credibility-verification#>
         SELECT (COUNT(?report) as ?count) WHERE {
             ?report a cred:RapportEvaluation .
         }
         # Get the latest report ID
         latest_query = """
+        PREFIX cred: <https://github.com/DominiqueLoyer/systemFactChecking#>
         SELECT ?report ?timestamp WHERE {
             ?report a cred:RapportEvaluation .
             ?report cred:completionTimestamp ?timestamp .
         # Query triples related to this report (Level 1)
         related_query = """
+        PREFIX cred: <https://github.com/DominiqueLoyer/systemFactChecking#>
         SELECT ?p ?o ?oType ?oLabel WHERE {
             <%s> ?p ?o .
             OPTIONAL { ?o a ?oType } .
     print("=== Testing OntologyManager ===\n")
     # Test with base ontology
+    base_path = "/Users/bk280625/documents041025/MonCode/sysCRED_onto26avrtil.ttl"
+    data_path = "/Users/bk280625/documents041025/MonCode/ontology/sysCRED_data.ttl"
     manager = OntologyManager(base_ontology_path=base_path, data_path=None)

syscred/requirements-distilled.txt CHANGED Viewed

@@ -1,6 +1,6 @@
-# SysCRED - Optimized Requirements with Distilled Models
 # (c) Dominique S. Loyer
-# Uses DISTILLED models for faster loading and lower memory
 # === Core Dependencies ===
 requests>=2.28.0
@@ -10,27 +10,26 @@ python-whois>=0.8.0
 # === RDF/Ontology ===
 rdflib>=6.0.0
-# === Machine Learning (CPU-only) ===
---extra-index-url https://download.pytorch.org/whl/cpu
-torch>=2.0.0
 transformers>=4.30.0
 sentence-transformers>=2.2.0
-# === Data ===
-numpy>=1.24.0
-pandas>=2.0.0
 # === Explainability ===
 lime>=0.2.0
-# === NLP ===
-spacy>=3.5.0
 # === Web Backend ===
 flask>=2.3.0
 flask-cors>=4.0.0
 python-dotenv>=1.0.0
 gunicorn>=20.1.0
-flask-sqlalchemy>=3.1.0
-scikit-learn>=1.3.0
-scipy>=1.11.0

+# SysCRED - Distilled Requirements for HuggingFace Spaces
+# CPU-only, smaller models for faster startup
 # (c) Dominique S. Loyer
 # === Core Dependencies ===
 requests>=2.28.0
 # === RDF/Ontology ===
 rdflib>=6.0.0
+# === Machine Learning (CPU-only, distilled) ===
 transformers>=4.30.0
+torch --index-url https://download.pytorch.org/whl/cpu
+numpy>=1.24.0
 sentence-transformers>=2.2.0
+accelerate>=0.20.0
+# === NLP ===
+spacy>=3.5.0
 # === Explainability ===
 lime>=0.2.0
 # === Web Backend ===
 flask>=2.3.0
 flask-cors>=4.0.0
 python-dotenv>=1.0.0
+pandas>=2.0.0
+# === Production/Database ===
 gunicorn>=20.1.0
+psycopg2-binary>=2.9.0
+flask-sqlalchemy>=3.0.0

syscred/requirements-light.txt DELETED Viewed

@@ -1,31 +0,0 @@
-# SysCRED - Light Requirements (for Render Free Tier)
-# Système Hybride de Vérification de Crédibilité
-# (c) Dominique S. Loyer
-#
-# NOTE: ML features (embeddings) disabled for memory constraints
-# For full ML support, use Railway, Fly.io, or Google Cloud Run
-# === Core Dependencies ===
-requests>=2.28.0
-beautifulsoup4>=4.11.0
-python-whois>=0.8.0
-# === RDF/Ontology ===
-rdflib>=6.0.0
-# === Data Processing (lightweight) ===
-numpy>=1.24.0
-pandas>=2.0.0
-# === Web Backend ===
-flask>=2.3.0
-flask-cors>=4.0.0
-python-dotenv>=1.0.0
-# === Production/Database ===
-gunicorn>=20.1.0
-psycopg2-binary>=2.9.0
-flask-sqlalchemy>=3.0.0
-# === Development/Testing ===
-pytest>=7.0.0

syscred/requirements.txt DELETED Viewed

@@ -1,34 +0,0 @@
-# SysCRED - Requirements
-# Système Hybride de Vérification de Crédibilité
-# (c) Dominique S. Loyer
-# === Core Dependencies ===
-requests>=2.28.0
-beautifulsoup4>=4.11.0
-python-whois>=0.8.0
-# === RDF/Ontology ===
-rdflib>=6.0.0
-# === Machine Learning ===
-transformers>=4.30.0
-torch>=2.0.0
-numpy>=1.24.0
-sentence-transformers>=2.2.0
-# === Explainability ===
-lime>=0.2.0
-# === Web Backend ===
-flask>=2.3.0
-flask-cors>=4.0.0
-python-dotenv>=1.0.0
-pandas>=2.0.0
-# === Production/Database ===
-gunicorn>=20.1.0
-psycopg2-binary>=2.9.0
-flask-sqlalchemy>=3.0.0
-# === Development/Testing ===
-pytest>=7.0.0

syscred/requirements_light.txt DELETED Viewed

@@ -1,19 +0,0 @@
-# SysCRED - Requirements (Light Version for Render Free Tier)
-# Sans ML models - Mode heuristique uniquement
-# (c) Dominique S. Loyer
-# === Core Dependencies ===
-requests>=2.28.0
-beautifulsoup4>=4.11.0
-python-whois>=0.8.0
-# === RDF/Ontology ===
-rdflib>=6.0.0
-# === Web Backend ===
-flask>=2.3.0
-flask-cors>=4.0.0
-python-dotenv>=1.0.0
-# === Production ===
-gunicorn>=20.1.0

syscred/static/index.html CHANGED Viewed

@@ -928,13 +928,9 @@
     <script>
         // Backend URLs
         const LOCAL_API_URL = 'http://localhost:5001';
-        const HF_API_URL = '';
         const REMOTE_API_URL = 'https://domloyer-syscred.hf.space';
         let API_URL = '';
-        // API_URL est choisi plus tard par le toggle / la détection d’environnement
-        // API_URL = LOCAL_API_URL;
         function toggleBackend() {
             const toggle = document.getElementById('backendToggle');
             const status = document.getElementById('backendStatus');
@@ -1402,7 +1398,7 @@
                 // FIX: Map backend data (label) to frontend expectations (name)
                 if (data.nodes) {
                     data.nodes = data.nodes.map(n => {
-                        n.name = n.label || n.name || 'Unknown';
                         if (!n.group) {
                             if (n.type === 'Source') n.group = 1;
                             else if (n.type === 'Entity') n.group = 1;
@@ -1626,22 +1622,18 @@
             if(!overlay) return;
-            title.textContent = d.name || d.label || 'Unknown';
             let typeColor = "#94a3b8";
             if(d.group === 1) typeColor = "#8b5cf6"; // Report
             if(d.group === 3) typeColor = "#22c55e"; // Good
             if(d.group === 4) typeColor = "#ef4444"; // Bad
-            // Use uri field if available, fallback to id
-            const displayUri = d.uri || d.id || 'N/A';
             body.innerHTML = `
                 <div style="margin-bottom:0.5rem">
                     <span style="background:${typeColor}; color:white; padding:2px 6px; border-radius:4px; font-size:0.75rem;">${d.type || 'Unknown Type'}</span>
                 </div>
-                <div><strong>URI:</strong> <br><span style="font-family:monospace; color:#a855f7; word-break:break-all;">${displayUri}</span></div>
-                ${d.score ? `<div style="margin-top:0.5rem"><strong>Score:</strong> ${(d.score * 100).toFixed(0)}%</div>` : ''}
             `;
             overlay.classList.add('visible');
@@ -1866,4 +1858,4 @@
     </script>
 </body>
-</html>

     <script>
         // Backend URLs
         const LOCAL_API_URL = 'http://localhost:5001';
         const REMOTE_API_URL = 'https://domloyer-syscred.hf.space';
         let API_URL = '';
         function toggleBackend() {
             const toggle = document.getElementById('backendToggle');
             const status = document.getElementById('backendStatus');
                 // FIX: Map backend data (label) to frontend expectations (name)
                 if (data.nodes) {
                     data.nodes = data.nodes.map(n => {
+                        n.name = n.name || n.label || 'Unknown';
                         if (!n.group) {
                             if (n.type === 'Source') n.group = 1;
                             else if (n.type === 'Entity') n.group = 1;
             if(!overlay) return;
+            title.textContent = d.name;
             let typeColor = "#94a3b8";
             if(d.group === 1) typeColor = "#8b5cf6"; // Report
             if(d.group === 3) typeColor = "#22c55e"; // Good
             if(d.group === 4) typeColor = "#ef4444"; // Bad
             body.innerHTML = `
                 <div style="margin-bottom:0.5rem">
                     <span style="background:${typeColor}; color:white; padding:2px 6px; border-radius:4px; font-size:0.75rem;">${d.type || 'Unknown Type'}</span>
                 </div>
+                <div><strong>URI:</strong> <br><span style="font-family:monospace; color:#a855f7; word-break:break-all;">${d.id}</span></div>
             `;
             overlay.classList.add('visible');
     </script>
 </body>
+</html>

syscred/syscred/eeat_calculator.py ADDED Viewed

	@@ -0,0 +1,466 @@

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+E-E-A-T Metrics Calculator for SysCRED
+========================================
+Calculates Google-style E-E-A-T metrics (Experience, Expertise, Authority, Trust).
+These metrics mirror modern Google ranking signals:
+- Experience: Domain age, content freshness
+- Expertise: Author identification, depth of content
+- Authority: PageRank simulation, citations/backlinks
+- Trust: HTTPS, fact-checks, low bias score
+"""
+from typing import Dict, Any, Optional, List
+from dataclasses import dataclass
+import re
+from datetime import datetime
+import logging
+logger = logging.getLogger(__name__)
+@dataclass
+class EEATScore:
+    """E-E-A-T score container."""
+    experience: float  # 0-1
+    expertise: float   # 0-1
+    authority: float   # 0-1
+    trust: float       # 0-1
+    @property
+    def overall(self) -> float:
+        """Weighted average of all E-E-A-T components."""
+        # Weights based on Google's emphasis
+        weights = {
+            'experience': 0.15,
+            'expertise': 0.25,
+            'authority': 0.35,
+            'trust': 0.25
+        }
+        return (
+            self.experience * weights['experience'] +
+            self.expertise * weights['expertise'] +
+            self.authority * weights['authority'] +
+            self.trust * weights['trust']
+        )
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary for JSON serialization."""
+        return {
+            'experience': round(self.experience, 3),
+            'expertise': round(self.expertise, 3),
+            'authority': round(self.authority, 3),
+            'trust': round(self.trust, 3),
+            'overall': round(self.overall, 3),
+            'experience_pct': f"{int(self.experience * 100)}%",
+            'expertise_pct': f"{int(self.expertise * 100)}%",
+            'authority_pct': f"{int(self.authority * 100)}%",
+            'trust_pct': f"{int(self.trust * 100)}%",
+            'overall_pct': f"{int(self.overall * 100)}%"
+        }
+class EEATCalculator:
+    """
+    Calculate E-E-A-T metrics from various signals.
+    Mirrors Google's quality rater guidelines:
+    - Experience: Has the author demonstrated real experience?
+    - Expertise: Is the content expert-level?
+    - Authority: Is the source recognized as authoritative?
+    - Trust: Is the source trustworthy?
+    """
+    # Known authoritative domains
+    AUTHORITATIVE_DOMAINS = {
+        # News
+        'lemonde.fr': 0.95,
+        'lefigaro.fr': 0.90,
+        'liberation.fr': 0.88,
+        'nytimes.com': 0.95,
+        'washingtonpost.com': 0.93,
+        'theguardian.com': 0.92,
+        'bbc.com': 0.94,
+        'bbc.co.uk': 0.94,
+        'reuters.com': 0.96,
+        'apnews.com': 0.95,
+        # Academic
+        'nature.com': 0.98,
+        'science.org': 0.98,
+        'pubmed.ncbi.nlm.nih.gov': 0.97,
+        'scholar.google.com': 0.85,
+        # Government
+        'gouv.fr': 0.90,
+        'gov.uk': 0.90,
+        'whitehouse.gov': 0.88,
+        'europa.eu': 0.92,
+        # Fact-checkers
+        'snopes.com': 0.88,
+        'factcheck.org': 0.90,
+        'politifact.com': 0.88,
+        'fullfact.org': 0.89,
+        # Wikipedia (moderate authority)
+        'wikipedia.org': 0.75,
+        'fr.wikipedia.org': 0.75,
+        'en.wikipedia.org': 0.75,
+    }
+    # Low-trust domains (misinformation sources)
+    LOW_TRUST_DOMAINS = {
+        'infowars.com': 0.1,
+        'breitbart.com': 0.3,
+        'naturalnews.com': 0.15,
+        # Add more as needed
+    }
+    def __init__(self):
+        """Initialize E-E-A-T calculator."""
+        pass
+    def calculate(
+        self,
+        url: str,
+        text: str,
+        nlp_analysis: Optional[Dict[str, Any]] = None,
+        pagerank: Optional[float] = None,
+        fact_checks: Optional[List[Dict]] = None,
+        domain_age_years: Optional[float] = None,
+        has_https: bool = True,
+        author_identified: bool = False,
+        seo_score: Optional[float] = None
+    ) -> EEATScore:
+        """
+        Calculate E-E-A-T scores from available signals.
+        Args:
+            url: Source URL
+            text: Article text content
+            nlp_analysis: NLP analysis results (sentiment, coherence, bias)
+            pagerank: Simulated PageRank score (0-1)
+            fact_checks: List of fact-check results
+            domain_age_years: Domain age in years (from WHOIS)
+            has_https: Whether site uses HTTPS
+            author_identified: Whether author is clearly identified
+            seo_score: SEO/technical quality score
+        Returns:
+            EEATScore with all component scores
+        """
+        # Extract domain from URL
+        domain = self._extract_domain(url)
+        # Calculate each component
+        experience = self._calculate_experience(
+            domain_age_years,
+            text,
+            nlp_analysis
+        )
+        expertise = self._calculate_expertise(
+            text,
+            author_identified,
+            nlp_analysis
+        )
+        authority = self._calculate_authority(
+            domain,
+            pagerank,
+            seo_score
+        )
+        trust = self._calculate_trust(
+            domain,
+            has_https,
+            fact_checks,
+            nlp_analysis
+        )
+        return EEATScore(
+            experience=experience,
+            expertise=expertise,
+            authority=authority,
+            trust=trust
+        )
+    def _extract_domain(self, url: str) -> str:
+        """Extract domain from URL."""
+        import re
+        match = re.search(r'https?://(?:www\.)?([^/]+)', url)
+        return match.group(1).lower() if match else url.lower()
+    def _calculate_experience(
+        self,
+        domain_age_years: Optional[float],
+        text: str,
+        nlp_analysis: Optional[Dict]
+    ) -> float:
+        """
+        Calculate Experience score.
+        Factors:
+        - Domain age (longer = more experience)
+        - Content freshness (recently updated)
+        - First-hand experience indicators in text
+        """
+        score = 0.5  # Base score
+        # Domain age contribution (max 0.3)
+        if domain_age_years is not None:
+            age_score = min(domain_age_years / 20, 1.0) * 0.3  # 20 years = max
+            score += age_score
+        else:
+            score += 0.15  # Assume moderate age
+        # Content depth contribution (max 0.2)
+        word_count = len(text.split()) if text else 0
+        if word_count > 1000:
+            score += 0.2
+        elif word_count > 500:
+            score += 0.15
+        elif word_count > 200:
+            score += 0.1
+        # First-hand experience indicators (max 0.1)
+        experience_indicators = [
+            r'\b(j\'ai|je suis|nous avons|I have|we have|in my experience)\b',
+            r'\b(interview|entretien|témoignage|witness|firsthand)\b',
+            r'\b(sur place|on the ground|eyewitness)\b'
+        ]
+        for pattern in experience_indicators:
+            if re.search(pattern, text, re.IGNORECASE):
+                score += 0.03
+        return min(score, 1.0)
+    def _calculate_expertise(
+        self,
+        text: str,
+        author_identified: bool,
+        nlp_analysis: Optional[Dict]
+    ) -> float:
+        """
+        Calculate Expertise score.
+        Factors:
+        - Author identification
+        - Technical depth of content
+        - Citation of sources
+        - Coherence (from NLP)
+        """
+        score = 0.4  # Base score
+        # Author identification (0.2)
+        if author_identified:
+            score += 0.2
+        # Citation indicators (max 0.2)
+        citation_patterns = [
+            r'\b(selon|according to|d\'après|source:)\b',
+            r'\b(étude|study|research|rapport|report)\b',
+            r'\b(expert|spécialiste|chercheur|professor|Dr\.)\b',
+            r'\[([\d]+)\]',  # [1] style citations
+            r'https?://[^\s]+'  # Links
+        ]
+        citation_count = 0
+        for pattern in citation_patterns:
+            citation_count += len(re.findall(pattern, text, re.IGNORECASE))
+        score += min(citation_count * 0.02, 0.2)
+        # Coherence from NLP analysis (0.2)
+        if nlp_analysis and 'coherence' in nlp_analysis:
+            coherence = nlp_analysis['coherence']
+            if isinstance(coherence, dict):
+                coherence = coherence.get('score', 0.5)
+            score += coherence * 0.2
+        else:
+            score += 0.1  # Assume moderate coherence
+        return min(score, 1.0)
+    def _calculate_authority(
+        self,
+        domain: str,
+        pagerank: Optional[float],
+        seo_score: Optional[float]
+    ) -> float:
+        """
+        Calculate Authority score.
+        Factors:
+        - Known authoritative domain
+        - PageRank simulation
+        - SEO/technical quality
+        """
+        score = 0.3  # Base score
+        # Known domain authority (max 0.5)
+        for known_domain, authority in self.AUTHORITATIVE_DOMAINS.items():
+            if known_domain in domain:
+                score = max(score, authority * 0.5 + 0.3)
+                break
+        # Check low-trust domains
+        for low_trust_domain, low_score in self.LOW_TRUST_DOMAINS.items():
+            if low_trust_domain in domain:
+                score = min(score, low_score)
+                break
+        # PageRank contribution (max 0.3)
+        if pagerank is not None:
+            score += pagerank * 0.3
+        else:
+            score += 0.15  # Assume moderate pagerank
+        # SEO score contribution (max 0.2)
+        if seo_score is not None:
+            score += seo_score * 0.2
+        else:
+            score += 0.1
+        return min(score, 1.0)
+    def _calculate_trust(
+        self,
+        domain: str,
+        has_https: bool,
+        fact_checks: Optional[List[Dict]],
+        nlp_analysis: Optional[Dict]
+    ) -> float:
+        """
+        Calculate Trust score.
+        Factors:
+        - HTTPS
+        - Fact-check results
+        - Bias score (low = better)
+        - Known trustworthy domain
+        """
+        score = 0.4  # Base score
+        # HTTPS (0.1)
+        if has_https:
+            score += 0.1
+        # Fact-check results (max 0.3)
+        if fact_checks:
+            positive_checks = sum(1 for fc in fact_checks
+                                  if fc.get('rating', '').lower() in ['true', 'vrai', 'correct'])
+            negative_checks = sum(1 for fc in fact_checks
+                                  if fc.get('rating', '').lower() in ['false', 'faux', 'incorrect', 'pants-fire'])
+            if positive_checks > 0:
+                score += 0.2
+            if negative_checks > 0:
+                score -= 0.3
+        # Bias score (max 0.2, lower bias = higher trust)
+        if nlp_analysis:
+            bias_data = nlp_analysis.get('bias_analysis', {})
+            if isinstance(bias_data, dict):
+                bias_score = bias_data.get('score', 0.3)
+            else:
+                bias_score = 0.3
+            # Invert: low bias = high trust contribution
+            score += (1 - bias_score) * 0.2
+        else:
+            score += 0.1
+        # Known trustworthy domain (0.1)
+        for known_domain in self.AUTHORITATIVE_DOMAINS:
+            if known_domain in domain:
+                score += 0.1
+                break
+        # Known low-trust domain (penalty)
+        for low_trust_domain in self.LOW_TRUST_DOMAINS:
+            if low_trust_domain in domain:
+                score -= 0.3
+                break
+        return max(min(score, 1.0), 0.0)
+    def explain_score(self, eeat: EEATScore, url: str) -> str:
+        """
+        Generate human-readable explanation of E-E-A-T score.
+        Args:
+            eeat: EEATScore instance
+            url: Source URL
+        Returns:
+            Formatted explanation string
+        """
+        domain = self._extract_domain(url)
+        explanations = []
+        # Experience
+        if eeat.experience >= 0.8:
+            explanations.append(f"✅ **Expérience élevée** ({eeat.experience_pct}): Source établie depuis longtemps")
+        elif eeat.experience >= 0.5:
+            explanations.append(f"🔶 **Expérience moyenne** ({eeat.experience_pct}): Source modérément établie")
+        else:
+            explanations.append(f"⚠️ **Expérience faible** ({eeat.experience_pct}): Source récente ou peu connue")
+        # Expertise
+        if eeat.expertise >= 0.8:
+            explanations.append(f"✅ **Expertise élevée** ({eeat.expertise_pct}): Contenu approfondi avec citations")
+        elif eeat.expertise >= 0.5:
+            explanations.append(f"🔶 **Expertise moyenne** ({eeat.expertise_pct}): Contenu standard")
+        else:
+            explanations.append(f"⚠️ **Expertise faible** ({eeat.expertise_pct}): Manque de profondeur")
+        # Authority
+        if eeat.authority >= 0.8:
+            explanations.append(f"✅ **Autorité élevée** ({eeat.authority_pct}): Source très citée et reconnue")
+        elif eeat.authority >= 0.5:
+            explanations.append(f"🔶 **Autorité moyenne** ({eeat.authority_pct}): Source modérément reconnue")
+        else:
+            explanations.append(f"⚠️ **Autorité faible** ({eeat.authority_pct}): Peu de citations externes")
+        # Trust
+        if eeat.trust >= 0.8:
+            explanations.append(f"✅ **Confiance élevée** ({eeat.trust_pct}): Faits vérifiés, pas de biais")
+        elif eeat.trust >= 0.5:
+            explanations.append(f"🔶 **Confiance moyenne** ({eeat.trust_pct}): Quelques signaux de confiance")
+        else:
+            explanations.append(f"⚠️ **Confiance faible** ({eeat.trust_pct}): Prudence recommandée")
+        return "\n".join(explanations)
+# Test
+if __name__ == "__main__":
+    calc = EEATCalculator()
+    test_url = "https://www.lemonde.fr/politique/article/2024/01/06/trump.html"
+    test_text = """
+    Selon une étude du chercheur Dr. Martin, l'insurrection du 6 janvier 2021
+    au Capitol a été un événement marquant. Notre reporter sur place a témoigné
+    des événements. Les experts politiques analysent les conséquences.
+    """
+    nlp_analysis = {
+        'coherence': {'score': 0.8},
+        'bias_analysis': {'score': 0.2}
+    }
+    eeat = calc.calculate(
+        url=test_url,
+        text=test_text,
+        nlp_analysis=nlp_analysis,
+        pagerank=0.7,
+        has_https=True,
+        author_identified=True
+    )
+    print("=== E-E-A-T Scores ===")
+    print(f"Experience: {eeat.experience_pct}")
+    print(f"Expertise:  {eeat.expertise_pct}")
+    print(f"Authority:  {eeat.authority_pct}")
+    print(f"Trust:      {eeat.trust_pct}")
+    print(f"Overall:    {eeat.overall_pct}")
+    print("\n=== Explanation ===")
+    print(calc.explain_score(eeat, test_url))

syscred/syscred/ner_analyzer.py ADDED Viewed

	@@ -0,0 +1,283 @@

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Named Entity Recognition (NER) Analyzer for SysCRED
+====================================================
+Extracts named entities from text using spaCy.
+Entities detected:
+- PER: Persons (Donald Trump, Emmanuel Macron)
+- ORG: Organizations (FBI, UN, Google)
+- LOC: Locations (Paris, Capitol)
+- DATE: Dates (January 6, 2021)
+- MONEY: Amounts ($10 million)
+- EVENT: Events (insurrection, election)
+"""
+from typing import Dict, List, Any, Optional
+import logging
+# Try to import spaCy
+try:
+    import spacy
+    from spacy.language import Language
+    HAS_SPACY = True
+except ImportError:
+    HAS_SPACY = False
+    spacy = None
+logger = logging.getLogger(__name__)
+class NERAnalyzer:
+    """
+    Named Entity Recognition analyzer using spaCy.
+    Supports French (fr_core_news_md) and English (en_core_web_md).
+    Falls back to heuristic extraction if spaCy is not available.
+    """
+    # Entity type mappings for display
+    ENTITY_LABELS = {
+        'PER': {'fr': 'Personne', 'en': 'Person', 'emoji': '👤'},
+        'PERSON': {'fr': 'Personne', 'en': 'Person', 'emoji': '👤'},
+        'ORG': {'fr': 'Organisation', 'en': 'Organization', 'emoji': '🏢'},
+        'LOC': {'fr': 'Lieu', 'en': 'Location', 'emoji': '📍'},
+        'GPE': {'fr': 'Lieu géopolitique', 'en': 'Geopolitical', 'emoji': '🌍'},
+        'DATE': {'fr': 'Date', 'en': 'Date', 'emoji': '📅'},
+        'TIME': {'fr': 'Heure', 'en': 'Time', 'emoji': '⏰'},
+        'MONEY': {'fr': 'Montant', 'en': 'Money', 'emoji': '💰'},
+        'PERCENT': {'fr': 'Pourcentage', 'en': 'Percent', 'emoji': '📊'},
+        'EVENT': {'fr': 'Événement', 'en': 'Event', 'emoji': '📰'},
+        'PRODUCT': {'fr': 'Produit', 'en': 'Product', 'emoji': '📦'},
+        'LAW': {'fr': 'Loi', 'en': 'Law', 'emoji': '⚖️'},
+        'NORP': {'fr': 'Groupe', 'en': 'Group', 'emoji': '👥'},
+        'MISC': {'fr': 'Divers', 'en': 'Miscellaneous', 'emoji': '🔖'},
+    }
+    def __init__(self, model_name: str = "fr_core_news_md", fallback: bool = True):
+        """
+        Initialize NER analyzer.
+        Args:
+            model_name: spaCy model to load (fr_core_news_md, en_core_web_md)
+            fallback: If True, use heuristics when spaCy unavailable
+        """
+        self.model_name = model_name
+        self.fallback = fallback
+        self.nlp = None
+        self.use_heuristics = False
+        if HAS_SPACY:
+            try:
+                self.nlp = spacy.load(model_name)
+                logger.info(f"[NER] Loaded spaCy model: {model_name}")
+            except OSError as e:
+                logger.warning(f"[NER] Could not load model {model_name}: {e}")
+                if fallback:
+                    self.use_heuristics = True
+                    logger.info("[NER] Using heuristic entity extraction")
+        else:
+            if fallback:
+                self.use_heuristics = True
+                logger.info("[NER] spaCy not installed. Using heuristic extraction")
+    def extract_entities(self, text: str) -> Dict[str, List[Dict[str, Any]]]:
+        """
+        Extract named entities from text.
+        Args:
+            text: Input text to analyze
+        Returns:
+            Dictionary mapping entity types to lists of entities
+            Each entity has: text, start, end, label, label_display, emoji, confidence
+        """
+        if not text or len(text.strip()) == 0:
+            return {}
+        if self.nlp:
+            return self._extract_with_spacy(text)
+        elif self.use_heuristics:
+            return self._extract_with_heuristics(text)
+        else:
+            return {}
+    def _extract_with_spacy(self, text: str) -> Dict[str, List[Dict[str, Any]]]:
+        """Extract entities using spaCy NLP."""
+        doc = self.nlp(text)
+        entities: Dict[str, List[Dict[str, Any]]] = {}
+        for ent in doc.ents:
+            label = ent.label_
+            # Get display info
+            label_info = self.ENTITY_LABELS.get(label, {
+                'fr': label,
+                'en': label,
+                'emoji': '🔖'
+            })
+            entity_data = {
+                'text': ent.text,
+                'start': ent.start_char,
+                'end': ent.end_char,
+                'label': label,
+                'label_display': label_info.get('fr', label),
+                'emoji': label_info.get('emoji', '🔖'),
+                'confidence': 0.85  # spaCy doesn't provide confidence by default
+            }
+            if label not in entities:
+                entities[label] = []
+            # Avoid duplicates
+            if not any(e['text'].lower() == entity_data['text'].lower() for e in entities[label]):
+                entities[label].append(entity_data)
+        return entities
+    def _extract_with_heuristics(self, text: str) -> Dict[str, List[Dict[str, Any]]]:
+        """
+        Fallback heuristic entity extraction.
+        Uses pattern matching for common entities.
+        """
+        import re
+        entities: Dict[str, List[Dict[str, Any]]] = {}
+        # Common patterns
+        patterns = {
+            'PER': [
+                # Known political figures
+                r'\b(Donald Trump|Joe Biden|Emmanuel Macron|Hillary Clinton|Barack Obama|'
+                r'Vladimir Putin|Angela Merkel|Justin Trudeau|Boris Johnson)\b',
+            ],
+            'ORG': [
+                r'\b(FBI|CIA|NSA|ONU|NATO|OTAN|Google|Facebook|Twitter|Meta|'
+                r'Amazon|Microsoft|Apple|CNN|BBC|Le Monde|New York Times|'
+                r'Parti Républicain|Parti Démocrate|Republican Party|Democratic Party)\b',
+            ],
+            'LOC': [
+                r'\b(Capitol|White House|Maison Blanche|Kremlin|Élysée|Pentagon|'
+                r'New York|Washington|Paris|Londres|Moscou|Berlin|Beijing)\b',
+            ],
+            'DATE': [
+                r'\b(\d{1,2}\s+(janvier|février|mars|avril|mai|juin|juillet|août|'
+                r'septembre|octobre|novembre|décembre)\s+\d{4})\b',
+                r'\b(\d{1,2}[-/]\d{1,2}[-/]\d{2,4})\b',
+                r'\b(January|February|March|April|May|June|July|August|'
+                r'September|October|November|December)\s+\d{1,2},?\s+\d{4}\b',
+            ],
+            'MONEY': [
+                r'\$[\d,]+(?:\.\d{2})?(?:\s*(?:million|billion|trillion))?',
+                r'[\d,]+(?:\.\d{2})?\s*(?:dollars?|euros?|€|\$)',
+                r'[\d,]+\s*(?:million|milliard)s?\s*(?:de\s+)?(?:dollars?|euros?)',
+            ],
+            'PERCENT': [
+                r'\b\d+(?:\.\d+)?%',
+                r'\b\d+(?:\.\d+)?\s*pour\s*cent',
+                r'\b\d+(?:\.\d+)?\s*percent',
+            ],
+        }
+        for label, pattern_list in patterns.items():
+            label_info = self.ENTITY_LABELS.get(label, {'fr': label, 'emoji': '🔖'})
+            for pattern in pattern_list:
+                for match in re.finditer(pattern, text, re.IGNORECASE):
+                    entity_data = {
+                        'text': match.group(),
+                        'start': match.start(),
+                        'end': match.end(),
+                        'label': label,
+                        'label_display': label_info.get('fr', label),
+                        'emoji': label_info.get('emoji', '🔖'),
+                        'confidence': 0.70  # Lower confidence for heuristics
+                    }
+                    if label not in entities:
+                        entities[label] = []
+                    # Avoid duplicates
+                    if not any(e['text'].lower() == entity_data['text'].lower()
+                              for e in entities[label]):
+                        entities[label].append(entity_data)
+        return entities
+    def get_entity_summary(self, entities: Dict[str, List[Dict[str, Any]]]) -> str:
+        """
+        Generate a human-readable summary of extracted entities.
+        Args:
+            entities: Dictionary of entities from extract_entities()
+        Returns:
+            Formatted string summary
+        """
+        if not entities:
+            return "Aucune entité nommée détectée."
+        lines = []
+        for label, ent_list in entities.items():
+            label_info = self.ENTITY_LABELS.get(label, {'fr': label, 'emoji': '🔖'})
+            emoji = label_info.get('emoji', '🔖')
+            label_display = label_info.get('fr', label)
+            entity_texts = [e['text'] for e in ent_list[:5]]  # Limit to 5
+            lines.append(f"{emoji} {label_display}: {', '.join(entity_texts)}")
+        return "\n".join(lines)
+    def to_frontend_format(self, entities: Dict[str, List[Dict[str, Any]]]) -> List[Dict]:
+        """
+        Convert entities to frontend-friendly format.
+        Returns:
+            List of entities with all info for display
+        """
+        result = []
+        for label, ent_list in entities.items():
+            for ent in ent_list:
+                result.append({
+                    'text': ent['text'],
+                    'type': ent['label'],
+                    'type_display': ent.get('label_display', ent['label']),
+                    'emoji': ent.get('emoji', '🔖'),
+                    'confidence': ent.get('confidence', 0.5),
+                    'confidence_pct': f"{int(ent.get('confidence', 0.5) * 100)}%"
+                })
+        # Sort by confidence
+        result.sort(key=lambda x: x['confidence'], reverse=True)
+        return result
+# Singleton instance for easy import
+_ner_analyzer: Optional[NERAnalyzer] = None
+def get_ner_analyzer(model_name: str = "fr_core_news_md") -> NERAnalyzer:
+    """Get or create singleton NER analyzer instance."""
+    global _ner_analyzer
+    if _ner_analyzer is None:
+        _ner_analyzer = NERAnalyzer(model_name=model_name, fallback=True)
+    return _ner_analyzer
+# Quick test
+if __name__ == "__main__":
+    analyzer = NERAnalyzer(fallback=True)
+    test_text = """
+    Donald Trump a affirmé que l'insurrection du 6 janvier 2021 au Capitol n'est jamais arrivée.
+    Le FBI enquête sur les événements. Le président Joe Biden a condamné ces déclarations à Washington.
+    Les dégâts sont estimés à 30 millions de dollars.
+    """
+    entities = analyzer.extract_entities(test_text)
+    print("=== Entités détectées ===")
+    print(analyzer.get_entity_summary(entities))
+    print("\n=== Format Frontend ===")
+    for e in analyzer.to_frontend_format(entities):
+        print(f"  {e['emoji']} {e['text']} ({e['type_display']}, {e['confidence_pct']})")

syscred/verification_system.py CHANGED Viewed

@@ -33,35 +33,28 @@ except ImportError:
     HAS_SBERT = False
     print("Warning: sentence-transformers not installed. Semantic coherence will use heuristics.")
-# Local imports - Support both syscred.module and relative imports
 try:
-    from syscred.api_clients import ExternalAPIClients, WebContent, ExternalData
-    from syscred.ontology_manager import OntologyManager
-    from syscred.seo_analyzer import SEOAnalyzer
-    from syscred.graph_rag import GraphRAG
-    from syscred.trec_retriever import TRECRetriever, Evidence, RetrievalResult
-    from syscred import config
 except ImportError:
-    from api_clients import ExternalAPIClients, WebContent, ExternalData
-    from ontology_manager import OntologyManager
-    from seo_analyzer import SEOAnalyzer
-    from graph_rag import GraphRAG
-    from trec_retriever import TRECRetriever, Evidence, RetrievalResult
-    import config
-# [NER + E-E-A-T] Imports optionnels - n'interferent pas avec les imports principaux
-HAS_NER_EEAT = False
 try:
-    from syscred.ner_analyzer import NERAnalyzer
     from syscred.eeat_calculator import EEATCalculator, EEATScore
-    HAS_NER_EEAT = True
 except ImportError:
-    try:
-        from ner_analyzer import NERAnalyzer
-        from eeat_calculator import EEATCalculator, EEATScore
-        HAS_NER_EEAT = True
-    except ImportError:
-        pass
 class CredibilityVerificationSystem:
@@ -143,18 +136,6 @@ class CredibilityVerificationSystem:
         # Weights for score calculation (Loaded from Config)
         self.weights = config.Config.SCORE_WEIGHTS
         print(f"[SysCRED] Using weights: {self.weights}")
-        # [NER + E-E-A-T] Initialize analyzers
-        self.ner_analyzer = None
-        self.eeat_calculator = None
-        if HAS_NER_EEAT:
-            try:
-                self.ner_analyzer = NERAnalyzer()
-                self.eeat_calculator = EEATCalculator()
-                print("[SysCRED] NER analyzer initialized")
-                print("[SysCRED] E-E-A-T calculator initialized")
-            except Exception as e:
-                print(f"[SysCRED] NER/E-E-A-T init failed: {e}")
         print("[SysCRED] System ready!")
@@ -163,47 +144,40 @@ class CredibilityVerificationSystem:
         print("[SysCRED] Loading ML models (this may take a moment)...")
         try:
-            # Sentiment analysis - modèle ultra-léger
             self.sentiment_pipeline = pipeline(
-                "sentiment-analysis",
-                model="distilbert-base-uncased-finetuned-sst-2-english",
-                device=-1,
-                model_kwargs={"low_cpu_mem_usage": True}
             )
-            print("[SysCRED] ✓ Sentiment model loaded (distilbert-base)")
         except Exception as e:
             print(f"[SysCRED] ✗ Sentiment model failed: {e}")
         try:
-            # NER pipeline - modèle plus léger
-            self.ner_pipeline = pipeline(
-                "ner",
-                model="dslim/bert-base-NER",
-                grouped_entities=True,
-                device=-1,
-                model_kwargs={"low_cpu_mem_usage": True}
-            )
-            print("[SysCRED] ✓ NER model loaded (dslim/bert-base-NER)")
         except Exception as e:
             print(f"[SysCRED] ✗ NER model failed: {e}")
         try:
-            # Bias detection - modèle plus léger si possible
-            bias_model_name = "typeform/distilbert-base-uncased-mnli"
             self.bias_tokenizer = AutoTokenizer.from_pretrained(bias_model_name)
             self.bias_model = AutoModelForSequenceClassification.from_pretrained(bias_model_name)
-            print("[SysCRED] ✓ Bias model loaded (distilbert-mnli)")
         except Exception as e:
             print(f"[SysCRED] ✗ Bias model failed: {e}. Using heuristics.")
         try:
-            # Semantic Coherence - modèle MiniLM (déjà léger)
             if HAS_SBERT:
                 self.coherence_model = SentenceTransformer('all-MiniLM-L6-v2')
-                print("[SysCRED] ✓ Coherence model loaded (SBERT MiniLM)")
         except Exception as e:
             print(f"[SysCRED] ✗ Coherence model failed: {e}")
         try:
             # LIME explainer
             self.explainer = LimeTextExplainer(class_names=['NEGATIVE', 'POSITIVE'])
@@ -527,26 +501,6 @@ class CredibilityVerificationSystem:
             adjustment_factor = (graph_score - 0.5) * w_graph * confidence
             adjustments += adjustment_factor
             total_weight_used += w_graph * confidence  # Partial weight based on confidence
-        # 8. [NEW] Linguistic Markers Analysis (sensationalism penalty)
-        # Penalize sensational language heavily, reward doubt markers (critical thinking)
-        linguistic = rule_results.get('linguistic_markers', {})
-        sensationalism_count = linguistic.get('sensationalism', 0)
-        doubt_count = linguistic.get('doubt', 0)
-        certainty_count = linguistic.get('certainty', 0)
-        # Sensationalism is a strong negative signal
-        if sensationalism_count > 0:
-            penalty = min(0.20, sensationalism_count * 0.05)  # Max 20% penalty
-            adjustments -= penalty
-        # Excessive certainty without sources is suspicious
-        if certainty_count > 2 and not fact_checks:
-            adjustments -= 0.05
-        # Doubt markers indicate critical/questioning tone (slight positive)
-        if doubt_count > 0:
-            adjustments += min(0.05, doubt_count * 0.02)
         # Final calculation
         # Base 0.5 + sum of weighted adjustments
@@ -703,24 +657,11 @@ class CredibilityVerificationSystem:
     ) -> Dict[str, Any]:
         """Generate the final evaluation report."""
-        # Determine credibility level
-        if overall_score >= 0.75:
-            niveau = "Élevée"
-        elif overall_score >= 0.55:
-            niveau = "Moyenne-Élevée"
-        elif overall_score >= 0.45:
-            niveau = "Moyenne"
-        elif overall_score >= 0.25:
-            niveau = "Faible-Moyenne"
-        else:
-            niveau = "Faible"
         report = {
             'idRapport': f"report_{int(datetime.datetime.now().timestamp())}",
             'informationEntree': input_data,
             'dateGeneration': datetime.datetime.now().isoformat(),
             'scoreCredibilite': round(overall_score, 2),
-            'niveauCredibilite': niveau,
             'resumeAnalyse': "",
             'detailsScore': {
                 'base': 0.5,
@@ -747,6 +688,8 @@ class CredibilityVerificationSystem:
             },
             # [NEW] TREC Evidence section
             'evidences': evidences or [],
             'metadonnees': {}
         }
@@ -813,6 +756,99 @@ class CredibilityVerificationSystem:
         return report
     def _get_score_factors(self, rule_results: Dict, nlp_results: Dict) -> List[Dict]:
         """Get list of factors that influenced the score (For UI)."""
         factors = []
@@ -973,40 +1009,6 @@ class CredibilityVerificationSystem:
         print("[SysCRED] Running NLP analysis...")
         nlp_results = self.nlp_analysis(cleaned_text)
-        # 6.5 [NER] Named Entity Recognition
-        ner_entities = {}
-        if self.ner_analyzer and cleaned_text:
-            try:
-                ner_entities = self.ner_analyzer.extract_entities(cleaned_text)
-                total = sum(len(v) for v in ner_entities.values() if isinstance(v, list))
-                print(f"[SysCRED] NER: {total} entites detectees")
-            except Exception as e:
-                print(f"[SysCRED] NER failed: {e}")
-        # 6.6 [E-E-A-T] Experience-Expertise-Authority-Trust scoring
-        eeat_scores = {}
-        if self.eeat_calculator:
-            try:
-                url_for_eeat = input_data if is_url else ""
-                domain_age_years = None
-                if external_data.domain_age_days:
-                    domain_age_years = external_data.domain_age_days / 365.0
-                eeat_raw = self.eeat_calculator.calculate(
-                    url=url_for_eeat,
-                    text=cleaned_text,
-                    nlp_analysis=nlp_results,
-                    fact_checks=rule_results.get('fact_checking', []),
-                    domain_age_years=domain_age_years,
-                    has_https=input_data.startswith("https://") if is_url else False
-                )
-                eeat_scores = eeat_raw.to_dict() if hasattr(eeat_raw, 'to_dict') else (
-                    eeat_raw if isinstance(eeat_raw, dict) else vars(eeat_raw)
-                )
-                print(f"[SysCRED] E-E-A-T score: {eeat_scores.get('overall', 'N/A')}")
-            except Exception as e:
-                print(f"[SysCRED] E-E-A-T failed: {e}")
         # 7. Calculate score (Now includes GraphRAG context)
         overall_score = self.calculate_overall_score(rule_results, nlp_results)
         print(f"[SysCRED] ✓ Credibility score: {overall_score:.2f}")
@@ -1018,10 +1020,6 @@ class CredibilityVerificationSystem:
             graph_context=graph_context
         )
-        # [NER + E-E-A-T] Always include in report (even if empty)
-        report['ner_entities'] = ner_entities
-        report['eeat_scores'] = eeat_scores
         # Add similar URIs to report for ontology linking
         if similar_uris:
             report['similar_claims_uris'] = similar_uris

     HAS_SBERT = False
     print("Warning: sentence-transformers not installed. Semantic coherence will use heuristics.")
+# Local imports
+from syscred.api_clients import ExternalAPIClients, WebContent, ExternalData
+from syscred.ontology_manager import OntologyManager
+from syscred.seo_analyzer import SEOAnalyzer
+from syscred.graph_rag import GraphRAG  # [NEW] GraphRAG
+from syscred.trec_retriever import TRECRetriever, Evidence, RetrievalResult  # [NEW] TREC Integration
+from syscred import config
+# [NEW] NER and E-E-A-T modules
 try:
+    from syscred.ner_analyzer import NERAnalyzer, get_ner_analyzer
+    HAS_NER = True
 except ImportError:
+    HAS_NER = False
+    print("[SysCRED] Warning: NER module not available")
 try:
     from syscred.eeat_calculator import EEATCalculator, EEATScore
+    HAS_EEAT = True
 except ImportError:
+    HAS_EEAT = False
+    print("[SysCRED] Warning: E-E-A-T module not available")
 class CredibilityVerificationSystem:
         # Weights for score calculation (Loaded from Config)
         self.weights = config.Config.SCORE_WEIGHTS
         print(f"[SysCRED] Using weights: {self.weights}")
         print("[SysCRED] System ready!")
         print("[SysCRED] Loading ML models (this may take a moment)...")
         try:
+            # Sentiment analysis
             self.sentiment_pipeline = pipeline(
+                "sentiment-analysis",
+                model="distilbert-base-uncased-finetuned-sst-2-english"
             )
+            print("[SysCRED] ✓ Sentiment model loaded")
         except Exception as e:
             print(f"[SysCRED] ✗ Sentiment model failed: {e}")
         try:
+            # NER pipeline
+            self.ner_pipeline = pipeline("ner", grouped_entities=True)
+            print("[SysCRED] ✓ NER model loaded")
         except Exception as e:
             print(f"[SysCRED] ✗ NER model failed: {e}")
         try:
+            # Bias detection - Specialized model
+            # Using 'd4data/bias-detection-model' or fallback to generic
+            bias_model_name = "d4data/bias-detection-model"
             self.bias_tokenizer = AutoTokenizer.from_pretrained(bias_model_name)
             self.bias_model = AutoModelForSequenceClassification.from_pretrained(bias_model_name)
+            print("[SysCRED] ✓ Bias model loaded (d4data)")
         except Exception as e:
             print(f"[SysCRED] ✗ Bias model failed: {e}. Using heuristics.")
         try:
+            # Semantic Coherence
             if HAS_SBERT:
                 self.coherence_model = SentenceTransformer('all-MiniLM-L6-v2')
+                print("[SysCRED] ✓ Coherence model loaded (SBERT)")
         except Exception as e:
             print(f"[SysCRED] ✗ Coherence model failed: {e}")
         try:
             # LIME explainer
             self.explainer = LimeTextExplainer(class_names=['NEGATIVE', 'POSITIVE'])
             adjustment_factor = (graph_score - 0.5) * w_graph * confidence
             adjustments += adjustment_factor
             total_weight_used += w_graph * confidence  # Partial weight based on confidence
         # Final calculation
         # Base 0.5 + sum of weighted adjustments
     ) -> Dict[str, Any]:
         """Generate the final evaluation report."""
         report = {
             'idRapport': f"report_{int(datetime.datetime.now().timestamp())}",
             'informationEntree': input_data,
             'dateGeneration': datetime.datetime.now().isoformat(),
             'scoreCredibilite': round(overall_score, 2),
             'resumeAnalyse': "",
             'detailsScore': {
                 'base': 0.5,
             },
             # [NEW] TREC Evidence section
             'evidences': evidences or [],
+            # [NEW] TREC IR Metrics for dashboard
+            'trec_metrics': self._calculate_trec_metrics(cleaned_text, evidences),
             'metadonnees': {}
         }
         return report
+    def _calculate_trec_metrics(self, text: str, evidences: List[Dict[str, Any]] = None) -> Dict[str, float]:
+        """
+        Calculate TREC-style IR metrics for display on dashboard.
+        Computes:
+        - Precision: Ratio of relevant retrieved documents
+        - Recall: Ratio of relevant documents retrieved
+        - MAP: Mean Average Precision
+        - NDCG: Normalized Discounted Cumulative Gain
+        - TF-IDF: Term Frequency-Inverse Document Frequency score
+        - MRR: Mean Reciprocal Rank
+        """
+        import math
+        metrics = {
+            'precision': 0.0,
+            'recall': 0.0,
+            'map': 0.0,
+            'ndcg': 0.0,
+            'tfidf': 0.0,
+            'mrr': 0.0
+        }
+        if not text:
+            return metrics
+        # TF-IDF based on text analysis
+        words = text.lower().split()
+        if words:
+            # Simple TF calculation
+            word_counts = {}
+            for word in words:
+                word_counts[word] = word_counts.get(word, 0) + 1
+            # Calculate TF-IDF score (simplified)
+            total_words = len(words)
+            unique_words = len(word_counts)
+            # Term frequency normalized
+            tf_scores = [count / total_words for count in word_counts.values()]
+            # IDF approximation based on word distribution
+            idf_approx = math.log((unique_words + 1) / 2)
+            tfidf_sum = sum(tf * idf_approx for tf in tf_scores)
+            metrics['tfidf'] = min(1.0, tfidf_sum / max(1, unique_words) * 10)
+        # If we have evidences, calculate retrieval metrics
+        if evidences and len(evidences) > 0:
+            k = len(evidences)
+            # For now, assume all retrieved evidences have some relevance
+            # based on their retrieval scores
+            scores = [e.get('score', 0) for e in evidences]
+            if scores:
+                avg_score = sum(scores) / len(scores)
+                max_score = max(scores)
+                # Precision at K (proxy: avg relevance score)
+                metrics['precision'] = min(1.0, avg_score if avg_score <= 1.0 else avg_score / max(1, max_score))
+                # Recall (proxy: coverage based on number of evidences)
+                metrics['recall'] = min(1.0, len(evidences) / 10)  # Assuming 10 is target
+                # MAP (proxy using score ranking)
+                ap_sum = 0.0
+                for i, score in enumerate(sorted(scores, reverse=True)):
+                    ap_sum += (i + 1) / (i + 2) * score if score <= 1.0 else (i + 1) / (i + 2)
+                metrics['map'] = ap_sum / len(scores) if scores else 0.0
+                # NDCG (simplified)
+                dcg = sum(
+                    (2 ** (score if score <= 1.0 else 1.0) - 1) / math.log2(i + 2)
+                    for i, score in enumerate(scores[:k])
+                )
+                ideal_scores = sorted(scores, reverse=True)
+                idcg = sum(
+                    (2 ** (score if score <= 1.0 else 1.0) - 1) / math.log2(i + 2)
+                    for i, score in enumerate(ideal_scores[:k])
+                )
+                metrics['ndcg'] = dcg / idcg if idcg > 0 else 0.0
+                # MRR (first relevant result)
+                for i, score in enumerate(scores):
+                    if (score > 0.5 if score <= 1.0 else score > max_score / 2):
+                        metrics['mrr'] = 1.0 / (i + 1)
+                        break
+                if metrics['mrr'] == 0 and len(scores) > 0:
+                    metrics['mrr'] = 1.0  # First result
+        # Round all values
+        return {k: round(v, 4) for k, v in metrics.items()}
     def _get_score_factors(self, rule_results: Dict, nlp_results: Dict) -> List[Dict]:
         """Get list of factors that influenced the score (For UI)."""
         factors = []
         print("[SysCRED] Running NLP analysis...")
         nlp_results = self.nlp_analysis(cleaned_text)
         # 7. Calculate score (Now includes GraphRAG context)
         overall_score = self.calculate_overall_score(rule_results, nlp_results)
         print(f"[SysCRED] ✓ Credibility score: {overall_score:.2f}")
             graph_context=graph_context
         )
         # Add similar URIs to report for ontology linking
         if similar_uris:
             report['similar_claims_uris'] = similar_uris