Spaces:

DomLoyer
/

syscred

Running

App Files Files Community

DomLoyer commited on Feb 23

Commit

ea9303b

verified ·

1 Parent(s): d228524

Sync: TREC IR metrics in verify, DB fallback, NER/EEAT fix, all API keys

Browse files

Files changed (14) hide show

.env.example +18 -0
Dockerfile +11 -33
ontology/sysCRED_data.ttl +0 -0
ontology/sysCRED_onto26avrtil.ttl +1030 -0
requirements.txt +8 -4
syscred/__init__.py +19 -9
syscred/backend_app.py +76 -9
syscred/config.py +16 -16
syscred/database.py +22 -6
syscred/db_store.py +354 -0
syscred/eeat_calculator.py +406 -210
syscred/ner_analyzer.py +218 -133
syscred/ontology_manager.py +7 -7
syscred/verification_system.py +130 -153

.env.example ADDED Viewed

	@@ -0,0 +1,18 @@

+# --- SysCRED Environment Variables ---
+# Copy this file to .env and fill in your values
+# --- Google Fact Check API ---
+SYSCRED_GOOGLE_API_KEY=your_google_api_key_here
+# --- Supabase Database ---
+SYSCRED_DATABASE_URL=postgresql://user:password@host:5432/dbname
+SUPABASE_URL=https://your-project.supabase.co
+SUPABASE_KEY=your_supabase_key
+# --- Application URLs ---
+SYSCRED_BASE_URL=https://syscred.uqam.ca
+# --- Server Settings ---
+SYSCRED_PORT=5001
+SYSCRED_ENV=development
+SYSCRED_LOAD_ML_MODELS=true

Dockerfile CHANGED Viewed

@@ -1,5 +1,5 @@
 # SysCRED Docker Configuration for Hugging Face Spaces
-# OPTIMIZED version with Distilled Models for faster startup
 FROM python:3.10-slim
 WORKDIR /app
@@ -7,48 +7,27 @@ WORKDIR /app
 ENV PYTHONDONTWRITEBYTECODE=1
 ENV PYTHONUNBUFFERED=1
 ENV PYTHONPATH=/app
-# ============================================
-# KEY OPTIMIZATION: Use distilled models
-# ============================================
 ENV SYSCRED_LOAD_ML_MODELS=true
-ENV SYSCRED_USE_DISTILLED=true
-ENV TRANSFORMERS_CACHE=/app/.cache/huggingface
-ENV HF_HOME=/app/.cache/huggingface
 # Install system dependencies
-RUN apt-get update && apt-get install -y \
     build-essential \
     && rm -rf /var/lib/apt/lists/*
-# Copy optimized requirements (distilled models, CPU-only torch)
-COPY requirements-distilled.txt /app/requirements.txt
-# Install dependencies
 RUN pip install --no-cache-dir -r requirements.txt
-# ============================================
-# PRE-DOWNLOAD DISTILLED MODELS (Build Time)
-# This avoids timeout during first request
-# ============================================
-RUN python -c "from transformers import pipeline; \
-    pipeline('sentiment-analysis', model='distilbert-base-uncased-finetuned-sst-2-english'); \
-    pipeline('ner', model='dslim/bert-base-NER'); \
-    print('✓ Distilled models pre-downloaded')"
-# Download small spaCy models
-RUN pip install spacy && \
-    python -m spacy download en_core_web_sm && \
-    python -m spacy download fr_core_news_sm && \
-    echo '✓ spaCy models downloaded'
-# Pre-download sentence transformer (small version)
-RUN python -c "from sentence_transformers import SentenceTransformer; \
-    SentenceTransformer('all-MiniLM-L6-v2'); \
-    print('✓ Sentence transformer pre-downloaded')"
 # Copy application code
 COPY syscred/ /app/syscred/
 # Create user for HF Spaces (required)
 RUN useradd -m -u 1000 user
@@ -61,5 +40,4 @@ WORKDIR /app
 EXPOSE 7860
 # Run with HF Spaces port (7860)
-# Increased workers to 4 for better concurrency, timeout 600s
-CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--workers", "1", "--threads", "4", "--timeout", "600", "syscred.backend_app:app"]

 # SysCRED Docker Configuration for Hugging Face Spaces
+# Full version with PyTorch and Transformers
 FROM python:3.10-slim
 WORKDIR /app
 ENV PYTHONDONTWRITEBYTECODE=1
 ENV PYTHONUNBUFFERED=1
 ENV PYTHONPATH=/app
 ENV SYSCRED_LOAD_ML_MODELS=true
+ENV SYSCRED_ENV=production
 # Install system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
     build-essential \
     && rm -rf /var/lib/apt/lists/*
+# Copy requirements (full version with ML)
+COPY requirements.txt /app/requirements.txt
+# Install dependencies (includes PyTorch, Transformers)
 RUN pip install --no-cache-dir -r requirements.txt
+# Download spaCy models for NER
+RUN python -m spacy download en_core_web_md || true
+RUN python -m spacy download fr_core_news_md || true
 # Copy application code
 COPY syscred/ /app/syscred/
+COPY ontology/ /app/ontology/
 # Create user for HF Spaces (required)
 RUN useradd -m -u 1000 user
 EXPOSE 7860
 # Run with HF Spaces port (7860)
+CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--workers", "2", "--timeout", "300", "syscred.backend_app:app"]

ontology/sysCRED_data.ttl ADDED Viewed

The diff for this file is too large to render. See raw diff

ontology/sysCRED_onto26avrtil.ttl ADDED Viewed

	@@ -0,0 +1,1030 @@

+@base <https://syscred.uqam.ca/ontology#> .
+@prefix : <https://syscred.uqam.ca/ontology#> .
+@prefix owl: <http://www.w3.org/2002/07/owl#> .
+@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+@prefix xml: <http://www.w3.org/XML/1998/namespace> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+#
+#
+# #################################################################
+# #
+# #    Annotation properties
+# #
+# #################################################################
+#
+#
+# http://www.w3.org/2002/07/owl#maxCardinality
+#
+#
+#
+# #################################################################
+# #
+# #    Object Properties
+# #
+# #################################################################
+#
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#analyzesSource
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#appliesRule
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#assignsCredibilityLevel
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#basedOnEvidence
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#concernsCriterion
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#concernsInformation
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#configuredByExpert
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#evaluatesCriterion
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#fetchesDataFrom
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#hasAuthor
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#hasCriterionResult
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#hasOriginalSource
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#includesNLPResult
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#includesRuleResult
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#includesSourceAnalysis
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#isReportOf
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#isSubjectOfRequest
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#obtainedVia
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#originatesFrom
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#producesReport
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#submitsRequest
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#submittedBy
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#usesModel
+#
+#
+#
+# #################################################################
+# #
+# #    Data properties
+# #
+# #################################################################
+#
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#authorName
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#coherenceScore
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#completionTimestamp
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#credibilityLevelValue
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#credibilityScoreValue
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#criterionResultConfidence
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#criterionResultValue
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#detectedBiases
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#evidenceSnippet
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#evidenceURL
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#informationContent
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#informationURL
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#modelName
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#modelType
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#reportSummary
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#requestStatus
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#ruleDescription
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#ruleLogic
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#ruleResultValid
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#ruleWeight
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#sentimentScore
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#sourceAnalyzedReputation
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#sourceAnalyzedURL
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#sourceMentionsCount
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#sourceReputationScore
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#sourceURL
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#submissionTimestamp
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#userName
+#
+#
+#
+# #################################################################
+# #
+# #    Classes
+# #
+# #################################################################
+#
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#AcademicJournal
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#ApiLLM
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#Author
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#BaseDeFaits
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#CredibilityLevel
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#Evidence
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#Expert
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#FactCheckingOrganization
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#InfoSourceAnalyse
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#InformationFaibleCredibilite
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#InformationHauteCredibilite
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#InformationMoyenneCredibilite
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#InformationSoumise
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#InformationVerifiee
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#ModeleIA
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#MoteurRecherche
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#NewsWebsite
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#Niveau_Bas
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#Niveau_Haut
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#Niveau_Moyen
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#Niveau_NonVerifie
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#PersonalBlog
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#RapportEvaluation
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#RefutingEvidence
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#RegleVerification
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#RequeteEvaluation
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#ResultatCritere
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#ResultatNLP
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#ResultatRegle
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#ResultatVerification
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#SocialMediaPlatform
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#Source
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#SupportingEvidence
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#SystemeExterne
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#User
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#VerificationCriterion
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#VerificationMethod
+#
+#
+#
+# #################################################################
+# #
+# #    Individuals
+# #
+# #################################################################
+#
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#Criteria_AuthorExpertise
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#Criteria_CoherenceAnalysis
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#Criteria_CrossReferencing
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#Criteria_FactCheckDB
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#Criteria_SourceReputation
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#Criteria_ToneAnalysis
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#Niveau_Bas
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#Niveau_Haut
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#Niveau_Moyen
+#
+# http://www.dic9335.uqam.ca/ontologies/credibility-verification#Niveau_NonVerifie
+#
+#
+#
+# #################################################################
+# #
+# #    Annotations
+# #
+# #################################################################
+#
+#
+#
+#
+#
+#
+#
+#
+# #################################################################
+# #
+# #    General axioms
+# #
+# #################################################################
+#
+#
+#
+#
+#
+#
+# Generated by the OWL API (version 4.5.29.2024-05-13T12:11:03Z) https://github.com/owlcs/owlapi
+<credibility-verification> a owl:Ontology;
+  rdfs:comment "Ontologie enrichie et adaptée modélisant les concepts liés à la vérification de la crédibilité des sources d'information sur le Web, basée sur le rapport de modélisation UML et inspirée par l'ontologie de subvention recherche."@fr;
+  rdfs:label "Ontologie Système de Vérification de Sources (Adaptée Rapport + Subvention)"@fr;
+  owl:versionInfo "2.1" .
+owl:maxCardinality a owl:AnnotationProperty .
+:analyzesSource a owl:ObjectProperty;
+  rdfs:domain :InfoSourceAnalyse;
+  rdfs:range :Source;
+  rdfs:label "analyse source"@fr .
+:appliesRule a owl:ObjectProperty, owl:FunctionalProperty;
+  rdfs:domain :ResultatRegle;
+  rdfs:range :RegleVerification;
+  rdfs:label "applique règle"@fr .
+:assignsCredibilityLevel a owl:ObjectProperty, owl:FunctionalProperty;
+  rdfs:domain :RapportEvaluation;
+  rdfs:range :CredibilityLevel;
+  rdfs:comment "Lie un rapport d'évaluation au niveau de crédibilité final attribué."@fr;
+  rdfs:label "assigne niveau crédibilité"@fr .
+:basedOnEvidence a owl:ObjectProperty;
+  rdfs:domain :RapportEvaluation;
+  rdfs:range :Evidence;
+  rdfs:comment "Lie un rapport d'évaluation aux preuves collectées."@fr;
+  rdfs:label "basé sur preuve"@fr .
+:concernsCriterion a owl:ObjectProperty, owl:FunctionalProperty;
+  rdfs:domain :ResultatCritere;
+  rdfs:range :VerificationCriterion;
+  rdfs:label "concerne critère"@fr .
+:concernsInformation a owl:ObjectProperty, owl:FunctionalProperty;
+  owl:inverseOf :isSubjectOfRequest;
+  rdfs:domain :RequeteEvaluation;
+  rdfs:range :InformationSoumise;
+  rdfs:label "concerne information"@fr .
+:configuredByExpert a owl:ObjectProperty;
+  rdfs:domain _:genid1;
+  rdfs:range :Expert;
+  rdfs:label "configuré par expert"@fr .
+_:genid1 a owl:Class;
+  owl:unionOf _:genid4 .
+_:genid4 a rdf:List;
+  rdf:first :ModeleIA;
+  rdf:rest _:genid3 .
+_:genid3 a rdf:List;
+  rdf:first :RegleVerification;
+  rdf:rest _:genid2 .
+_:genid2 a rdf:List;
+  rdf:first :VerificationCriterion;
+  rdf:rest rdf:nil .
+:evaluatesCriterion a owl:ObjectProperty;
+  rdfs:domain _:genid5;
+  rdfs:range :VerificationCriterion;
+  rdfs:comment "Lie une règle ou un modèle au critère de vérification qu'il est conçu pour évaluer."@fr;
+  rdfs:label "évalue critère"@fr .
+_:genid5 a owl:Class;
+  owl:unionOf _:genid7 .
+_:genid7 a rdf:List;
+  rdf:first :ModeleIA;
+  rdf:rest _:genid6 .
+_:genid6 a rdf:List;
+  rdf:first :RegleVerification;
+  rdf:rest rdf:nil .
+:fetchesDataFrom a owl:ObjectProperty;
+  rdfs:domain :RequeteEvaluation;
+  rdfs:range :SystemeExterne;
+  rdfs:label "récupère données de"@fr .
+:hasAuthor a owl:ObjectProperty;
+  rdfs:domain :InformationSoumise;
+  rdfs:range :Author;
+  rdfs:comment "Lie une information soumise à son auteur présumé."@fr;
+  rdfs:label "a pour auteur"@fr .
+:hasCriterionResult a owl:ObjectProperty;
+  rdfs:domain :RapportEvaluation;
+  rdfs:range :ResultatCritere;
+  rdfs:comment "Lie un rapport au résultat détaillé pour un critère d'évaluation spécifique."@fr;
+  rdfs:label "a résultat pour critère"@fr .
+:hasOriginalSource a owl:ObjectProperty;
+  rdfs:domain :InformationSoumise;
+  rdfs:range :Source;
+  rdfs:comment "Lie une information soumise à sa source d'origine principale."@fr;
+  rdfs:label "a pour source originale"@fr .
+:includesNLPResult a owl:ObjectProperty;
+  rdfs:domain :RapportEvaluation;
+  rdfs:range :ResultatNLP;
+  rdfs:label "inclut résultat NLP"@fr .
+:includesRuleResult a owl:ObjectProperty;
+  rdfs:domain :RapportEvaluation;
+  rdfs:range :ResultatRegle;
+  rdfs:label "inclut résultat règle"@fr .
+:includesSourceAnalysis a owl:ObjectProperty;
+  rdfs:domain :RapportEvaluation;
+  rdfs:range :InfoSourceAnalyse;
+  rdfs:label "inclut analyse source"@fr .
+:isReportOf a owl:ObjectProperty, owl:InverseFunctionalProperty;
+  owl:inverseOf :producesReport;
+  rdfs:domain :RapportEvaluation;
+  rdfs:range :RequeteEvaluation;
+  rdfs:label "est rapport de"@fr .
+:isSubjectOfRequest a owl:ObjectProperty;
+  rdfs:domain :InformationSoumise;
+  rdfs:range :RequeteEvaluation;
+  rdfs:label "est sujet de requête"@fr .
+:obtainedVia a owl:ObjectProperty;
+  rdfs:domain :ResultatCritere;
+  rdfs:range _:genid8;
+  rdfs:label "obtenu via"@fr .
+_:genid8 a owl:Class;
+  owl:unionOf _:genid10 .
+_:genid10 a rdf:List;
+  rdf:first :ResultatNLP;
+  rdf:rest _:genid9 .
+_:genid9 a rdf:List;
+  rdf:first :ResultatRegle;
+  rdf:rest rdf:nil .
+:originatesFrom a owl:ObjectProperty;
+  rdfs:domain :Evidence;
+  rdfs:range :Source;
+  rdfs:comment "Lie une preuve à la source d'où elle a été extraite."@fr;
+  rdfs:label "provient de"@fr .
+:producesReport a owl:ObjectProperty, owl:FunctionalProperty;
+  rdfs:domain :RequeteEvaluation;
+  rdfs:range :RapportEvaluation;
+  rdfs:label "produit rapport"@fr .
+:submitsRequest a owl:ObjectProperty;
+  owl:inverseOf :submittedBy;
+  rdfs:domain :User;
+  rdfs:range :RequeteEvaluation;
+  rdfs:label "soumet requête"@fr .
+:submittedBy a owl:ObjectProperty, owl:FunctionalProperty;
+  rdfs:domain :RequeteEvaluation;
+  rdfs:range :User;
+  rdfs:comment "Lie une requête de vérification à l'utilisateur qui l'a soumise."@fr;
+  rdfs:label "soumise par"@fr .
+:usesModel a owl:ObjectProperty, owl:FunctionalProperty;
+  rdfs:domain :ResultatNLP;
+  rdfs:range :ModeleIA;
+  rdfs:label "utilise modèle"@fr .
+:authorName a owl:DatatypeProperty;
+  rdfs:domain :Author;
+  rdfs:range xsd:string;
+  rdfs:label "nom de l'auteur"@fr .
+:coherenceScore a owl:DatatypeProperty;
+  rdfs:domain :ResultatNLP;
+  rdfs:range xsd:float;
+  rdfs:label "score cohérence"@fr .
+:completionTimestamp a owl:DatatypeProperty, owl:FunctionalProperty;
+  rdfs:domain :RapportEvaluation;
+  rdfs:range xsd:dateTime;
+  rdfs:label "horodatage de complétion"@fr .
+:credibilityLevelValue a owl:DatatypeProperty, owl:FunctionalProperty;
+  rdfs:domain :CredibilityLevel;
+  rdfs:range xsd:float;
+  rdfs:label "valeur numérique niveau"@fr .
+:credibilityScoreValue a owl:DatatypeProperty, owl:FunctionalProperty;
+  rdfs:domain :RapportEvaluation;
+  rdfs:range xsd:float;
+  rdfs:label "valeur score crédibilité"@fr .
+:criterionResultConfidence a owl:DatatypeProperty;
+  rdfs:domain :ResultatCritere;
+  rdfs:range xsd:float;
+  rdfs:label "confiance résultat critère"@fr .
+:criterionResultValue a owl:DatatypeProperty;
+  rdfs:domain :ResultatCritere;
+  rdfs:range xsd:string;
+  rdfs:label "valeur résultat critère"@fr .
+:detectedBiases a owl:DatatypeProperty;
+  rdfs:domain :ResultatNLP;
+  rdfs:range xsd:string;
+  rdfs:comment "";
+  rdfs:label "biais détectés"@fr .
+:evidenceSnippet a owl:DatatypeProperty;
+  rdfs:domain :Evidence;
+  rdfs:range xsd:string;
+  rdfs:label "extrait de la preuve"@fr .
+:evidenceURL a owl:DatatypeProperty;
+  rdfs:domain :Evidence;
+  rdfs:range xsd:anyURI;
+  rdfs:label "URL de la preuve"@fr .
+:informationContent a owl:DatatypeProperty;
+  rdfs:domain :InformationSoumise;
+  rdfs:range xsd:string;
+  rdfs:label "contenu de l'information"@fr .
+:informationURL a owl:DatatypeProperty;
+  rdfs:domain :InformationSoumise;
+  rdfs:range xsd:anyURI;
+  rdfs:label "URL de l'information"@fr .
+:modelName a owl:DatatypeProperty;
+  rdfs:domain :ModeleIA;
+  rdfs:range xsd:string;
+  rdfs:label "nom modèle"@fr .
+:modelType a owl:DatatypeProperty;
+  rdfs:domain :ModeleIA;
+  rdfs:range xsd:string;
+  rdfs:label "type modèle"@fr .
+:reportSummary a owl:DatatypeProperty;
+  rdfs:domain :RapportEvaluation;
+  rdfs:range xsd:string;
+  rdfs:label "résumé du rapport"@fr .
+:requestStatus a owl:DatatypeProperty, owl:FunctionalProperty;
+  rdfs:domain :RequeteEvaluation;
+  rdfs:range xsd:string;
+  rdfs:label "statut requête"@fr .
+:ruleDescription a owl:DatatypeProperty;
+  rdfs:domain :RegleVerification;
+  rdfs:range xsd:string;
+  rdfs:label "description règle"@fr .
+:ruleLogic a owl:DatatypeProperty;
+  rdfs:domain :RegleVerification;
+  rdfs:range xsd:string;
+  rdfs:label "logique règle"@fr .
+:ruleResultValid a owl:DatatypeProperty;
+  rdfs:domain :ResultatRegle;
+  rdfs:range xsd:boolean;
+  rdfs:label "résultat règle valide"@fr .
+:ruleWeight a owl:DatatypeProperty;
+  rdfs:domain :RegleVerification;
+  rdfs:range xsd:float;
+  rdfs:label "poids règle"@fr .
+:sentimentScore a owl:DatatypeProperty;
+  rdfs:domain :ResultatNLP;
+  rdfs:range xsd:float;
+  rdfs:label "score sentiment"@fr .
+:sourceAnalyzedReputation a owl:DatatypeProperty;
+  rdfs:domain :InfoSourceAnalyse;
+  rdfs:range xsd:string;
+  rdfs:label "réputation source analysée"@fr .
+:sourceAnalyzedURL a owl:DatatypeProperty;
+  rdfs:domain :InfoSourceAnalyse;
+  rdfs:range xsd:anyURI;
+  rdfs:label "URL source analysée"@fr .
+:sourceMentionsCount a owl:DatatypeProperty;
+  rdfs:domain :InfoSourceAnalyse;
+  rdfs:range xsd:integer;
+  rdfs:label "mentions source analysée"@fr .
+:sourceReputationScore a owl:DatatypeProperty;
+  rdfs:domain :Source;
+  rdfs:range xsd:float;
+  rdfs:label "score de réputation de la source"@fr .
+:sourceURL a owl:DatatypeProperty, owl:FunctionalProperty;
+  rdfs:domain :Source;
+  rdfs:range xsd:anyURI;
+  rdfs:label "URL de la source"@fr .
+:submissionTimestamp a owl:DatatypeProperty, owl:FunctionalProperty;
+  rdfs:domain :RequeteEvaluation;
+  rdfs:range xsd:dateTime;
+  rdfs:label "horodatage de soumission"@fr .
+:userName a owl:DatatypeProperty;
+  rdfs:domain :User;
+  rdfs:range xsd:string;
+  rdfs:label "nom d'utilisateur"@fr .
+:AcademicJournal a owl:Class;
+  rdfs:subClassOf :Source;
+  rdfs:label "Revue Académique"@fr .
+:ApiLLM a owl:Class;
+  rdfs:subClassOf :SystemeExterne;
+  rdfs:label "API de LLM"@fr .
+:Author a owl:Class;
+  rdfs:comment "Représente la personne ou l'entité créditée pour la création de l'information soumise."@fr;
+  rdfs:label "Auteur"@fr .
+:BaseDeFaits a owl:Class;
+  rdfs:subClassOf :SystemeExterne;
+  rdfs:label "Base de Données de Faits Vérifiés"@fr .
+:CredibilityLevel a owl:Class;
+  rdfs:comment "Représente le niveau de crédibilité qualitatif ou quantitatif attribué dans le rapport."@fr;
+  rdfs:label "Niveau de Crédibilité"@fr .
+:Evidence a owl:Class;
+  rdfs:comment "Représente un élément d'information externe utilisé pour étayer ou réfuter l'information vérifiée."@fr;
+  rdfs:label "Preuve"@fr .
+:Expert a owl:Class;
+  rdfs:subClassOf :User;
+  rdfs:comment "Utilisateur qualifié responsable de la configuration et de l'amélioration du système (règles, modèles)."@fr;
+  rdfs:label "Expert"@fr .
+:FactCheckingOrganization a owl:Class;
+  rdfs:subClassOf :Source;
+  rdfs:label "Organisation de Vérification des Faits"@fr .
+:InfoSourceAnalyse a owl:Class;
+  rdfs:subClassOf _:genid11;
+  rdfs:comment "Détails sur une source spécifique telle qu'analysée et présentée dans le rapport."@fr;
+  rdfs:label "Information Source Analysée"@fr .
+_:genid11 a owl:Restriction;
+  owl:cardinality "1"^^xsd:nonNegativeInteger;
+  owl:onProperty :analyzesSource .
+:InformationFaibleCredibilite a owl:Class;
+  owl:equivalentClass _:genid12;
+  rdfs:subClassOf _:genid22;
+  rdfs:label "Information Faiblement Crédible"@fr .
+_:genid12 a owl:Class;
+  owl:intersectionOf _:genid21 .
+_:genid21 a rdf:List;
+  rdf:first :InformationVerifiee;
+  rdf:rest _:genid19 .
+_:genid19 a rdf:List;
+  rdf:first _:genid20;
+  rdf:rest _:genid17 .
+_:genid17 a rdf:List;
+  rdf:first _:genid18;
+  rdf:rest _:genid13 .
+_:genid13 a rdf:List;
+  rdf:first _:genid14;
+  rdf:rest rdf:nil .
+_:genid14 a owl:Restriction;
+  owl:someValuesFrom _:genid15;
+  owl:onProperty :isSubjectOfRequest .
+_:genid15 a owl:Restriction;
+  owl:someValuesFrom _:genid16;
+  owl:onProperty :producesReport .
+_:genid16 a owl:Restriction;
+  owl:hasValue :Niveau_Bas;
+  owl:onProperty :assignsCredibilityLevel .
+_:genid18 a owl:Class;
+  owl:complementOf :InformationMoyenneCredibilite .
+_:genid20 a owl:Class;
+  owl:complementOf :InformationHauteCredibilite .
+_:genid22 a owl:Restriction;
+  owl:allValuesFrom _:genid23;
+  owl:onProperty :isSubjectOfRequest .
+_:genid23 a owl:Restriction;
+  owl:allValuesFrom _:genid24;
+  owl:onProperty :producesReport .
+_:genid24 a owl:Restriction;
+  owl:hasValue :Niveau_Bas;
+  owl:onProperty :assignsCredibilityLevel .
+:InformationHauteCredibilite a owl:Class;
+  owl:equivalentClass _:genid25;
+  rdfs:subClassOf _:genid31;
+  rdfs:label "Information Hautement Crédible"@fr .
+_:genid25 a owl:Class;
+  owl:intersectionOf _:genid30 .
+_:genid30 a rdf:List;
+  rdf:first :InformationVerifiee;
+  rdf:rest _:genid26 .
+_:genid26 a rdf:List;
+  rdf:first _:genid27;
+  rdf:rest rdf:nil .
+_:genid27 a owl:Restriction;
+  owl:someValuesFrom _:genid28;
+  owl:onProperty :isSubjectOfRequest .
+_:genid28 a owl:Restriction;
+  owl:someValuesFrom _:genid29;
+  owl:onProperty :producesReport .
+_:genid29 a owl:Restriction;
+  owl:hasValue :Niveau_Haut;
+  owl:onProperty :assignsCredibilityLevel .
+_:genid31 a owl:Restriction;
+  owl:allValuesFrom _:genid32;
+  owl:onProperty :isSubjectOfRequest .
+_:genid32 a owl:Restriction;
+  owl:allValuesFrom _:genid33;
+  owl:onProperty :producesReport .
+_:genid33 a owl:Restriction;
+  owl:hasValue :Niveau_Haut;
+  owl:onProperty :assignsCredibilityLevel .
+:InformationMoyenneCredibilite a owl:Class;
+  owl:equivalentClass _:genid34;
+  rdfs:subClassOf _:genid42;
+  rdfs:label "Information Moyennement Crédible"@fr .
+_:genid34 a owl:Class;
+  owl:intersectionOf _:genid41 .
+_:genid41 a rdf:List;
+  rdf:first :InformationVerifiee;
+  rdf:rest _:genid39 .
+_:genid39 a rdf:List;
+  rdf:first _:genid40;
+  rdf:rest _:genid35 .
+_:genid35 a rdf:List;
+  rdf:first _:genid36;
+  rdf:rest rdf:nil .
+_:genid36 a owl:Restriction;
+  owl:someValuesFrom _:genid37;
+  owl:onProperty :isSubjectOfRequest .
+_:genid37 a owl:Restriction;
+  owl:someValuesFrom _:genid38;
+  owl:onProperty :producesReport .
+_:genid38 a owl:Restriction;
+  owl:hasValue :Niveau_Moyen;
+  owl:onProperty :assignsCredibilityLevel .
+_:genid40 a owl:Class;
+  owl:complementOf :InformationHauteCredibilite .
+_:genid42 a owl:Restriction;
+  owl:allValuesFrom _:genid43;
+  owl:onProperty :isSubjectOfRequest .
+_:genid43 a owl:Restriction;
+  owl:allValuesFrom _:genid44;
+  owl:onProperty :producesReport .
+_:genid44 a owl:Restriction;
+  owl:hasValue :Niveau_Moyen;
+  owl:onProperty :assignsCredibilityLevel .
+:InformationSoumise a owl:Class;
+  rdfs:comment "Représente l'unité d'information (texte, URL) telle que soumise pour vérification."@fr;
+  rdfs:label "Information Soumise"@fr .
+:InformationVerifiee a owl:Class;
+  owl:equivalentClass _:genid45;
+  rdfs:label "Information Vérifiée"@fr .
+_:genid45 a owl:Class;
+  owl:intersectionOf _:genid49 .
+_:genid49 a rdf:List;
+  rdf:first :InformationSoumise;
+  rdf:rest _:genid46 .
+_:genid46 a rdf:List;
+  rdf:first _:genid47;
+  rdf:rest rdf:nil .
+_:genid47 a owl:Restriction;
+  owl:someValuesFrom _:genid48;
+  owl:onProperty :isSubjectOfRequest .
+_:genid48 a owl:Restriction;
+  owl:someValuesFrom :RapportEvaluation;
+  owl:onProperty :producesReport .
+:ModeleIA a owl:Class;
+  rdfs:subClassOf :VerificationMethod, _:genid50;
+  rdfs:comment "Représente un modèle d'apprentissage automatique utilisé pour l'analyse sémantique ou autre."@fr;
+  rdfs:label "Modèle IA/NLP"@fr .
+_:genid50 a owl:Restriction;
+  owl:minCardinality "1"^^xsd:nonNegativeInteger;
+  owl:onProperty :evaluatesCriterion .
+:MoteurRecherche a owl:Class;
+  rdfs:subClassOf :SystemeExterne;
+  rdfs:label "Moteur de Recherche"@fr .
+:NewsWebsite a owl:Class;
+  rdfs:subClassOf :Source;
+  rdfs:label "Site d'actualités"@fr .
+:Niveau_Bas a owl:Class, owl:NamedIndividual, :CredibilityLevel;
+  :credibilityLevelValue "0.2"^^xsd:float;
+  rdfs:label "Crédibilité Faible"@fr .
+:Niveau_Haut a owl:Class, owl:NamedIndividual, :CredibilityLevel;
+  :credibilityLevelValue "0.8"^^xsd:float;
+  rdfs:label "Crédibilité Élevée"@fr .
+:Niveau_Moyen a owl:Class, owl:NamedIndividual, :CredibilityLevel;
+  :credibilityLevelValue "0.5"^^xsd:float;
+  rdfs:label "Crédibilité Moyenne"@fr .
+:Niveau_NonVerifie a owl:Class, owl:NamedIndividual, :CredibilityLevel;
+  rdfs:label "Non Vérifié"@fr .
+:PersonalBlog a owl:Class;
+  rdfs:subClassOf :Source;
+  rdfs:label "Blog Personnel"@fr .
+:RapportEvaluation a owl:Class;
+  rdfs:subClassOf _:genid51;
+  rdfs:comment "Encapsule les résultats complets du processus de vérification pour une requête donnée."@fr;
+  rdfs:label "Rapport d'Évaluation"@fr .
+_:genid51 a owl:Restriction;
+  owl:cardinality "1"^^xsd:nonNegativeInteger;
+  owl:onProperty :assignsCredibilityLevel .
+:RefutingEvidence a owl:Class;
+  rdfs:subClassOf :Evidence;
+  owl:disjointWith :SupportingEvidence;
+  rdfs:label "Preuve réfutante"@fr .
+:RegleVerification a owl:Class;
+  rdfs:subClassOf :VerificationMethod, _:genid52;
+  rdfs:comment "Représente une règle logique prédéfinie utilisée pour évaluer un aspect de la crédibilité."@fr;
+  rdfs:label "Règle de Vérification"@fr .
+_:genid52 a owl:Restriction;
+  owl:minCardinality "1"^^xsd:nonNegativeInteger;
+  owl:onProperty :evaluatesCriterion .
+:RequeteEvaluation a owl:Class;
+  rdfs:subClassOf _:genid53, _:genid54, _:genid55;
+  rdfs:comment "Représente une demande spécifique de vérification de crédibilité soumise par un utilisateur."@fr;
+  rdfs:label "Requête d'Évaluation"@fr .
+_:genid53 a owl:Restriction;
+  owl:minCardinality "0"^^xsd:nonNegativeInteger;
+  owl:onProperty :producesReport .
+_:genid54 a owl:Restriction;
+  owl:cardinality "1"^^xsd:nonNegativeInteger;
+  owl:onProperty :concernsInformation .
+_:genid55 a owl:Restriction;
+  owl:cardinality "1"^^xsd:nonNegativeInteger;
+  owl:onProperty :submittedBy .
+:ResultatCritere a owl:Class;
+  rdfs:subClassOf _:genid56, _:genid57;
+  rdfs:comment "Représente le résultat de l'évaluation d'un critère spécifique pour une requête, potentiellement basé sur un ou plusieurs résultats de règles/NLP."@fr;
+  rdfs:label "Résultat Critère"@fr .
+_:genid56 a owl:Restriction;
+  owl:minCardinality "1"^^xsd:nonNegativeInteger;
+  owl:onProperty :obtainedVia .
+_:genid57 a owl:Restriction;
+  owl:cardinality "1"^^xsd:nonNegativeInteger;
+  owl:onProperty :concernsCriterion .
+:ResultatNLP a owl:Class;
+  rdfs:subClassOf :ResultatVerification, _:genid58;
+  owl:disjointWith :ResultatRegle;
+  rdfs:comment "Résultat de l'analyse effectuée par un modèle IA/NLP."@fr;
+  rdfs:label "Résultat NLP"@fr .
+_:genid58 a owl:Restriction;
+  owl:cardinality "1"^^xsd:nonNegativeInteger;
+  owl:onProperty :usesModel .
+:ResultatRegle a owl:Class;
+  rdfs:subClassOf :ResultatVerification, _:genid59;
+  rdfs:comment "Résultat de l'application d'une règle de vérification spécifique."@fr;
+  rdfs:label "Résultat Règle"@fr .
+_:genid59 a owl:Restriction;
+  owl:cardinality "1"^^xsd:nonNegativeInteger;
+  owl:onProperty :appliesRule .
+:ResultatVerification a owl:Class;
+  rdfs:comment "Classe parente pour les résultats issus des différentes méthodes de vérification."@fr;
+  rdfs:label "Résultat de Vérification (Interne)"@fr .
+:SocialMediaPlatform a owl:Class;
+  rdfs:subClassOf :Source;
+  rdfs:label "Plateforme de Média Social"@fr .
+:Source a owl:Class;
+  rdfs:comment "Représente une entité (site web, organisation, personne) d'où provient l'information originale ou la preuve."@fr;
+  rdfs:label "Source"@fr .
+:SupportingEvidence a owl:Class;
+  rdfs:subClassOf :Evidence;
+  rdfs:label "Preuve à l'appui"@fr .
+:SystemeExterne a owl:Class;
+  rdfs:comment "Représente une source de données ou un service externe utilisé pendant le processus de vérification (API, base de données)."@fr;
+  rdfs:label "Système Externe"@fr .
+:User a owl:Class;
+  rdfs:comment "Représente une personne interagissant avec le système de vérification."@fr;
+  rdfs:label "Utilisateur"@fr .
+:VerificationCriterion a owl:Class;
+  rdfs:comment "Aspect spécifique évalué lors de la vérification (ex: réputation de la source, cohérence)."@fr;
+  rdfs:label "Critère de Vérification"@fr .
+:VerificationMethod a owl:Class;
+  rdfs:comment "Représente une approche (règle, modèle IA) utilisée pour évaluer la crédibilité."@fr;
+  rdfs:label "Méthode de Vérification"@fr .
+:Criteria_AuthorExpertise a owl:NamedIndividual, :VerificationCriterion;
+  rdfs:label "Expertise de l'auteur"@fr .
+:Criteria_CoherenceAnalysis a owl:NamedIndividual, :VerificationCriterion;
+  rdfs:label "Analyse de la cohérence"@fr .
+:Criteria_CrossReferencing a owl:NamedIndividual, :VerificationCriterion;
+  rdfs:label "Références croisées"@fr .
+:Criteria_FactCheckDB a owl:NamedIndividual, :VerificationCriterion;
+  rdfs:label "Consultation base de données Fact-Check"@fr .
+:Criteria_SourceReputation a owl:NamedIndividual, :VerificationCriterion;
+  rdfs:label "Réputation de la source"@fr .
+:Criteria_ToneAnalysis a owl:NamedIndividual, :VerificationCriterion;
+  rdfs:label "Analyse du ton (ex: neutre, biaisé)"@fr .
+_:genid60 owl:maxCardinality "1"^^xsd:nonNegativeInteger .
+_:genid61 a owl:AllDisjointClasses;
+  owl:members _:genid66 .
+_:genid66 a rdf:List;
+  rdf:first :AcademicJournal;
+  rdf:rest _:genid65 .
+_:genid65 a rdf:List;
+  rdf:first :FactCheckingOrganization;
+  rdf:rest _:genid64 .
+_:genid64 a rdf:List;
+  rdf:first :NewsWebsite;
+  rdf:rest _:genid63 .
+_:genid63 a rdf:List;
+  rdf:first :PersonalBlog;
+  rdf:rest _:genid62 .
+_:genid62 a rdf:List;
+  rdf:first :SocialMediaPlatform;
+  rdf:rest rdf:nil .
+_:genid67 a owl:AllDisjointClasses;
+  owl:members _:genid70 .
+_:genid70 a rdf:List;
+  rdf:first :ApiLLM;
+  rdf:rest _:genid69 .
+_:genid69 a rdf:List;
+  rdf:first :BaseDeFaits;
+  rdf:rest _:genid68 .
+_:genid68 a rdf:List;
+  rdf:first :MoteurRecherche;
+  rdf:rest rdf:nil .
+_:genid71 a owl:AllDisjointClasses;
+  owl:members _:genid74 .
+_:genid74 a rdf:List;
+  rdf:first :InformationFaibleCredibilite;
+  rdf:rest _:genid73 .
+_:genid73 a rdf:List;
+  rdf:first :InformationHauteCredibilite;
+  rdf:rest _:genid72 .
+_:genid72 a rdf:List;
+  rdf:first :InformationMoyenneCredibilite;
+  rdf:rest rdf:nil .
+_:genid75 a owl:AllDisjointClasses;
+  owl:members _:genid79 .
+_:genid79 a rdf:List;
+  rdf:first :Niveau_Bas;
+  rdf:rest _:genid78 .
+_:genid78 a rdf:List;
+  rdf:first :Niveau_Haut;
+  rdf:rest _:genid77 .
+_:genid77 a rdf:List;
+  rdf:first :Niveau_Moyen;
+  rdf:rest _:genid76 .
+_:genid76 a rdf:List;
+  rdf:first :Niveau_NonVerifie;
+  rdf:rest rdf:nil .

requirements.txt CHANGED Viewed

@@ -1,20 +1,24 @@
-# SysCRED - Requirements
 # Système Hybride de Vérification de Crédibilité
 # (c) Dominique S. Loyer
 # === Core Dependencies ===
 requests>=2.28.0
 beautifulsoup4>=4.11.0
 python-whois>=0.8.0
 # === RDF/Ontology ===
 rdflib>=6.0.0
 # === Machine Learning ===
-transformers>=4.30.0,<5
 torch>=2.0.0
-numpy>=1.24.0,<2
 sentence-transformers>=2.2.0
 # === Explainability ===
 lime>=0.2.0
@@ -28,7 +32,7 @@ pandas>=2.0.0
 # === Production/Database ===
 gunicorn>=20.1.0
 psycopg2-binary>=2.9.0
-flask_sqlalchemy>=3.0.0
 # === Development/Testing ===
 pytest>=7.0.0

+# SysCRED - Requirements (Full version with ML models)
 # Système Hybride de Vérification de Crédibilité
 # (c) Dominique S. Loyer
+# Version complète pour HuggingFace Spaces et développement local
 # === Core Dependencies ===
 requests>=2.28.0
 beautifulsoup4>=4.11.0
 python-whois>=0.8.0
+lxml>=4.9.0
 # === RDF/Ontology ===
 rdflib>=6.0.0
 # === Machine Learning ===
+transformers>=4.30.0
 torch>=2.0.0
+numpy>=1.24.0
 sentence-transformers>=2.2.0
+accelerate>=0.20.0
+spacy>=3.6.0
 # === Explainability ===
 lime>=0.2.0
 # === Production/Database ===
 gunicorn>=20.1.0
 psycopg2-binary>=2.9.0
+flask-sqlalchemy>=3.0.0
 # === Development/Testing ===
 pytest>=7.0.0

syscred/__init__.py CHANGED Viewed

@@ -9,17 +9,19 @@ Citation Key: loyerModelingHybridSystem2025
 Modules:
 - api_clients: Web scraping, WHOIS, Fact Check APIs
 - ir_engine: BM25, QLD, TF-IDF, PRF (from TREC)
-- trec_retriever: Evidence retrieval for fact-checking (NEW v2.3)
-- trec_dataset: TREC AP88-90 data loader (NEW v2.3)
-- liar_dataset: LIAR benchmark dataset loader (NEW v2.3)
 - seo_analyzer: SEO analysis, PageRank estimation
 - eval_metrics: MAP, NDCG, P@K, Recall, MRR
 - ontology_manager: RDFLib integration
 - verification_system: Main credibility pipeline
-- graph_rag: GraphRAG for contextual memory (enhanced v2.3)
 """
-__version__ = "2.3.1"
 __author__ = "Dominique S. Loyer"
 __citation__ = "loyerModelingHybridSystem2025"
@@ -32,11 +34,15 @@ from syscred.ir_engine import IREngine
 from syscred.eval_metrics import EvaluationMetrics
 from syscred.graph_rag import GraphRAG
-# TREC Integration (NEW - Feb 2026)
 from syscred.trec_retriever import TRECRetriever, Evidence, RetrievalResult
 from syscred.trec_dataset import TRECDataset, TRECTopic
-# LIAR Benchmark (NEW - Feb 2026)
 from syscred.liar_dataset import LIARDataset, LiarStatement, LiarLabel
 # Convenience alias
@@ -52,13 +58,17 @@ __all__ = [
     'IREngine',
     'EvaluationMetrics',
     'GraphRAG',
-    # TREC (NEW)
     'TRECRetriever',
     'TRECDataset',
     'TRECTopic',
     'Evidence',
     'RetrievalResult',
-    # LIAR Benchmark (NEW)
     'LIARDataset',
     'LiarStatement',
     'LiarLabel',

 Modules:
 - api_clients: Web scraping, WHOIS, Fact Check APIs
 - ir_engine: BM25, QLD, TF-IDF, PRF (from TREC)
+- trec_retriever: Evidence retrieval for fact-checking (v2.3)
+- trec_dataset: TREC AP88-90 data loader (v2.3)
+- liar_dataset: LIAR benchmark dataset loader (v2.3)
 - seo_analyzer: SEO analysis, PageRank estimation
 - eval_metrics: MAP, NDCG, P@K, Recall, MRR
 - ontology_manager: RDFLib integration
 - verification_system: Main credibility pipeline
+- graph_rag: GraphRAG for contextual memory (v2.3)
+- ner_analyzer: Named Entity Recognition with spaCy (v2.4)
+- eeat_calculator: Google E-E-A-T metrics (v2.4)
 """
+__version__ = "2.4.0"
 __author__ = "Dominique S. Loyer"
 __citation__ = "loyerModelingHybridSystem2025"
 from syscred.eval_metrics import EvaluationMetrics
 from syscred.graph_rag import GraphRAG
+# NER and E-E-A-T (NEW - v2.4)
+from syscred.ner_analyzer import NERAnalyzer
+from syscred.eeat_calculator import EEATCalculator, EEATScore
+# TREC Integration (v2.3)
 from syscred.trec_retriever import TRECRetriever, Evidence, RetrievalResult
 from syscred.trec_dataset import TRECDataset, TRECTopic
+# LIAR Benchmark (v2.3)
 from syscred.liar_dataset import LIARDataset, LiarStatement, LiarLabel
 # Convenience alias
     'IREngine',
     'EvaluationMetrics',
     'GraphRAG',
+    # NER & E-E-A-T (NEW v2.4)
+    'NERAnalyzer',
+    'EEATCalculator',
+    'EEATScore',
+    # TREC (v2.3)
     'TRECRetriever',
     'TRECDataset',
     'TRECTopic',
     'Evidence',
     'RetrievalResult',
+    # LIAR Benchmark (v2.3)
     'LIARDataset',
     'LiarStatement',
     'LiarLabel',

syscred/backend_app.py CHANGED Viewed

@@ -22,15 +22,16 @@ import traceback
 from pathlib import Path
 try:
     from dotenv import load_dotenv
-    env_path = Path(__file__).parent / '.env'
-    try:
-        if env_path.exists():
-            load_dotenv(env_path)
-            print(f"[SysCRED Backend] Loaded .env from {env_path}")
-        else:
-            print(f"[SysCRED Backend] No .env file found at {env_path}")
-    except PermissionError:
-        print(f"[SysCRED Backend] Permission denied for {env_path}, using system env vars")
 except ImportError:
     print("[SysCRED Backend] python-dotenv not installed, using system env vars")
@@ -88,6 +89,16 @@ except ImportError as e:
 app = Flask(__name__)
 CORS(app)  # Enable CORS for frontend
 # Initialize Database
 try:
     init_db(app) # [NEW] Setup DB connection
@@ -270,6 +281,62 @@ def verify_endpoint():
         print(f"[SysCRED Backend] Score: {result.get('scoreCredibilite', 'N/A')}")
         # [NEW] Persist to Database
         try:
             new_analysis = AnalysisResult(

 from pathlib import Path
 try:
     from dotenv import load_dotenv
+    # .env is at project root (parent of syscred/)
+    env_path = Path(__file__).resolve().parent.parent / '.env'
+    if not env_path.exists():
+        # Fallback: check syscred/ directory
+        env_path = Path(__file__).parent / '.env'
+    if env_path.exists():
+        load_dotenv(env_path)
+        print(f"[SysCRED Backend] Loaded .env from {env_path}")
+    else:
+        print(f"[SysCRED Backend] No .env file found, using system env vars")
 except ImportError:
     print("[SysCRED Backend] python-dotenv not installed, using system env vars")
 app = Flask(__name__)
 CORS(app)  # Enable CORS for frontend
+# Allow iframe embedding on UQAM domains (for syscred.uqam.ca mirror)
+@app.after_request
+def add_security_headers(response):
+    """Add security headers allowing UQAM iframe embedding."""
+    response.headers['X-Frame-Options'] = 'ALLOW-FROM https://syscred.uqam.ca'
+    response.headers['Content-Security-Policy'] = (
+        "frame-ancestors 'self' https://syscred.uqam.ca https://*.uqam.ca"
+    )
+    return response
 # Initialize Database
 try:
     init_db(app) # [NEW] Setup DB connection
         print(f"[SysCRED Backend] Score: {result.get('scoreCredibilite', 'N/A')}")
+        # [NEW] TREC Evidence Search + IR Metrics
+        try:
+            global trec_retriever, eval_metrics
+            # Initialize TREC if needed
+            if trec_retriever is None and TREC_AVAILABLE:
+                trec_retriever = TRECRetriever(use_stemming=True, enable_prf=False)
+                trec_retriever.corpus = TREC_DEMO_CORPUS
+                eval_metrics = EvaluationMetrics()
+                print("[SysCRED Backend] TREC Retriever initialized with demo corpus")
+            if trec_retriever and eval_metrics:
+                import time
+                start_time = time.time()
+                # Use the input text as query
+                query_text = input_data[:200] if not credibility_system.is_url(input_data) else result.get('informationEntree', input_data)[:200]
+                trec_result = trec_retriever.retrieve_evidence(query_text, k=5, model='bm25')
+                search_time = (time.time() - start_time) * 1000
+                retrieved_ids = [e.doc_id for e in trec_result.evidences]
+                # Use climate-related docs as "relevant" for demo evaluation
+                # In production, this would come from qrels files
+                relevant_ids = set(TREC_DEMO_CORPUS.keys())  # All docs as relevant pool
+                # Compute IR metrics
+                k = len(retrieved_ids) if retrieved_ids else 1
+                precision = eval_metrics.precision_at_k(retrieved_ids, relevant_ids, k) if retrieved_ids else 0
+                recall = eval_metrics.recall_at_k(retrieved_ids, relevant_ids, k) if retrieved_ids else 0
+                ap = eval_metrics.average_precision(retrieved_ids, relevant_ids) if retrieved_ids else 0
+                mrr = eval_metrics.mrr(retrieved_ids, relevant_ids) if retrieved_ids else 0
+                relevance_dict = {doc: 1 for doc in relevant_ids}
+                ndcg = eval_metrics.ndcg_at_k(retrieved_ids, relevance_dict, k) if retrieved_ids else 0
+                # TF-IDF score from top result
+                tfidf_score = trec_result.evidences[0].score if trec_result.evidences else 0
+                result['trec_metrics'] = {
+                    'precision': round(precision, 4),
+                    'recall': round(recall, 4),
+                    'map': round(ap, 4),
+                    'ndcg': round(ndcg, 4),
+                    'tfidf_score': round(tfidf_score, 4),
+                    'mrr': round(mrr, 4),
+                    'retrieved_count': len(retrieved_ids),
+                    'corpus_size': len(TREC_DEMO_CORPUS),
+                    'search_time_ms': round(search_time, 2)
+                }
+                print(f"[SysCRED Backend] TREC: P={precision:.3f} R={recall:.3f} MAP={ap:.3f} NDCG={ndcg:.3f} MRR={mrr:.3f}")
+        except Exception as e:
+            print(f"[SysCRED Backend] TREC metrics error: {e}")
+            result['trec_metrics'] = {'error': str(e)}
         # [NEW] Persist to Database
         try:
             new_analysis = AnalysisResult(

syscred/config.py CHANGED Viewed

@@ -23,23 +23,22 @@ from pathlib import Path
 from typing import Dict, Optional
 from dotenv import load_dotenv
-# Charger les variables depuis .env
 # Charger les variables depuis .env (Project Root)
-# Path: .../systemFactChecking/02_Code/syscred/config.py
-# Root .env is at .../systemFactChecking/.env (3 levels up)
 current_path = Path(__file__).resolve()
-env_path = current_path.parent.parent.parent / '.env'
-try:
-    if not env_path.exists():
-        print(f"[Config] WARNING: .env not found at {env_path}")
-        # Try alternate location (sometimes CWD matters)
-        env_path = Path.cwd().parent / '.env'
-    load_dotenv(dotenv_path=env_path)
-    print(f"[Config] Loading .env from {env_path}")
-except PermissionError:
-    print(f"[Config] Permission denied for .env, using system env vars")
 print(f"[Config] SYSCRED_GOOGLE_API_KEY loaded: {'Yes' if os.environ.get('SYSCRED_GOOGLE_API_KEY') else 'No'}")
@@ -53,8 +52,9 @@ class Config:
     """
     # === Chemins ===
     BASE_DIR = Path(__file__).parent.parent
-    ONTOLOGY_BASE_PATH = BASE_DIR / "sysCRED_onto26avrtil.ttl"
     ONTOLOGY_DATA_PATH = BASE_DIR / "ontology" / "sysCRED_data.ttl"
     # === Serveur Flask ===
@@ -64,7 +64,7 @@ class Config:
     # === API Keys ===
     GOOGLE_FACT_CHECK_API_KEY = os.getenv("SYSCRED_GOOGLE_API_KEY")
-    DATABASE_URL = os.getenv("DATABASE_URL") # [NEW] Read DB URL from env
     # === Modèles ML ===
     # Support both SYSCRED_LOAD_ML and SYSCRED_LOAD_ML_MODELS (for Render)

 from typing import Dict, Optional
 from dotenv import load_dotenv
 # Charger les variables depuis .env (Project Root)
+# Path: .../systemFactChecking/syscred/config.py
+# Root .env is at .../systemFactChecking/.env (1 level up from syscred/)
 current_path = Path(__file__).resolve()
+env_path = current_path.parent.parent / '.env'
+if not env_path.exists():
+    print(f"[Config] WARNING: .env not found at {env_path}")
+    # Try alternate locations
+    for alt in [Path.cwd() / '.env', Path.cwd().parent / '.env']:
+        if alt.exists():
+            env_path = alt
+            break
+load_dotenv(dotenv_path=env_path)
+print(f"[Config] Loading .env from {env_path}")
 print(f"[Config] SYSCRED_GOOGLE_API_KEY loaded: {'Yes' if os.environ.get('SYSCRED_GOOGLE_API_KEY') else 'No'}")
     """
     # === Chemins ===
+    # BASE_DIR = project root (parent of syscred/)
     BASE_DIR = Path(__file__).parent.parent
+    ONTOLOGY_BASE_PATH = BASE_DIR / "ontology" / "sysCRED_onto26avrtil.ttl"
     ONTOLOGY_DATA_PATH = BASE_DIR / "ontology" / "sysCRED_data.ttl"
     # === Serveur Flask ===
     # === API Keys ===
     GOOGLE_FACT_CHECK_API_KEY = os.getenv("SYSCRED_GOOGLE_API_KEY")
+    DATABASE_URL = os.getenv("SYSCRED_DATABASE_URL", os.getenv("DATABASE_URL"))  # Standardized env var
     # === Modèles ML ===
     # Support both SYSCRED_LOAD_ML and SYSCRED_LOAD_ML_MODELS (for Render)

syscred/database.py CHANGED Viewed

@@ -3,6 +3,7 @@
 Database Manager for SysCRED
 ===========================
 Handles connection to Supabase (PostgreSQL) and defines models.
 """
 import os
@@ -32,23 +33,38 @@ class AnalysisResult(db.Model):
             'url': self.url,
             'score': self.credibility_score,
             'summary': self.summary,
-            'created_at': self.created_at.isoformat(),
             'source_reputation': self.source_reputation
         }
 def init_db(app):
     """Initialize the database with the Flask app."""
-    # Fallback to sqlite for local dev if no DATABASE_URL
-    db_url = os.environ.get('DATABASE_URL')
     if db_url and db_url.startswith("postgres://"):
         db_url = db_url.replace("postgres://", "postgresql://", 1)
     app.config['SQLALCHEMY_DATABASE_URI'] = db_url or 'sqlite:///syscred.db'
     app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
     db.init_app(app)
-    # Create tables if they don't exist (basic migration)
     with app.app_context():
-        db.create_all()
-        print("[SysCRED-DB] Database tables initialized.")

 Database Manager for SysCRED
 ===========================
 Handles connection to Supabase (PostgreSQL) and defines models.
+Falls back to SQLite if PostgreSQL is unavailable.
 """
 import os
             'url': self.url,
             'score': self.credibility_score,
             'summary': self.summary,
+            'created_at': self.created_at.isoformat() if self.created_at else None,
             'source_reputation': self.source_reputation
         }
 def init_db(app):
     """Initialize the database with the Flask app."""
+    # Use SYSCRED_DATABASE_URL first (from .env), fallback to DATABASE_URL (from Render/HF)
+    db_url = os.environ.get('SYSCRED_DATABASE_URL') or os.environ.get('DATABASE_URL')
     if db_url and db_url.startswith("postgres://"):
         db_url = db_url.replace("postgres://", "postgresql://", 1)
+    # Test PostgreSQL reachability before committing to it
+    if db_url and 'postgresql' in db_url:
+        try:
+            import socket
+            from urllib.parse import urlparse
+            parsed = urlparse(db_url)
+            socket.getaddrinfo(parsed.hostname, parsed.port or 5432)
+        except (socket.gaierror, Exception) as e:
+            print(f"[SysCRED-DB] PostgreSQL host unreachable ({parsed.hostname}): {e}")
+            print("[SysCRED-DB] Falling back to SQLite...")
+            db_url = None  # Force SQLite fallback
     app.config['SQLALCHEMY_DATABASE_URI'] = db_url or 'sqlite:///syscred.db'
     app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
     db.init_app(app)
     with app.app_context():
+        try:
+            db.create_all()
+            db_type = 'PostgreSQL (Supabase)' if db_url else 'SQLite (local)'
+            print(f"[SysCRED-DB] Database initialized: {db_type}")
+        except Exception as e:
+            print(f"[SysCRED-DB] Database init error: {e}")

syscred/db_store.py ADDED Viewed

	@@ -0,0 +1,354 @@

+"""
+SysCRED Storage Module - SQLite + Supabase
+==========================================
+Stocke les triplets RDF et résultats d'analyse.
+Utilise SQLite localement, avec option de sync vers Supabase.
+"""
+import os
+import sqlite3
+import hashlib
+import json
+from datetime import datetime
+from typing import Optional, Dict, Any, List, Tuple
+from urllib.parse import urlparse
+from pathlib import Path
+# Chemins
+BASE_DIR = Path(__file__).parent
+DB_PATH = BASE_DIR / "syscred_local.db"
+class SysCREDStore:
+    """
+    Gestionnaire de stockage pour SysCRED.
+    SQLite local avec option Supabase.
+    """
+    def __init__(self, db_path: str = None, supabase_url: str = None):
+        self.db_path = db_path or str(DB_PATH)
+        self.supabase_url = supabase_url or os.getenv("DATABASE_URL")
+        self.conn = None
+        self._init_local_db()
+    def _init_local_db(self):
+        """Initialise la base SQLite locale."""
+        self.conn = sqlite3.connect(self.db_path, check_same_thread=False)
+        self.conn.row_factory = sqlite3.Row
+        # Créer les tables
+        self.conn.executescript("""
+            -- Résultats d'analyse
+            CREATE TABLE IF NOT EXISTS analysis_results (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                url TEXT NOT NULL,
+                credibility_score REAL NOT NULL,
+                summary TEXT,
+                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                source_reputation TEXT,
+                fact_check_count INTEGER DEFAULT 0,
+                score_details TEXT,
+                domain TEXT
+            );
+            -- Triplets RDF
+            CREATE TABLE IF NOT EXISTS rdf_triples (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                subject TEXT NOT NULL,
+                predicate TEXT NOT NULL,
+                object TEXT NOT NULL,
+                object_type TEXT DEFAULT 'uri',
+                graph_name TEXT DEFAULT 'data',
+                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                UNIQUE(subject, predicate, object, graph_name)
+            );
+            -- Sources
+            CREATE TABLE IF NOT EXISTS sources (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                domain TEXT UNIQUE NOT NULL,
+                reputation_score REAL,
+                domain_age_years REAL,
+                is_fact_checker INTEGER DEFAULT 0,
+                analysis_count INTEGER DEFAULT 0,
+                last_analyzed TIMESTAMP,
+                metadata TEXT,
+                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+            );
+            -- Claims
+            CREATE TABLE IF NOT EXISTS claims (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                claim_text TEXT NOT NULL,
+                claim_hash TEXT UNIQUE,
+                source_url TEXT,
+                extracted_entities TEXT,
+                credibility_score REAL,
+                verification_status TEXT DEFAULT 'unverified',
+                evidence_count INTEGER DEFAULT 0,
+                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+            );
+            -- Evidence
+            CREATE TABLE IF NOT EXISTS evidence (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                claim_id INTEGER,
+                doc_id TEXT,
+                doc_text TEXT,
+                relevance_score REAL,
+                retrieval_method TEXT DEFAULT 'bm25',
+                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                FOREIGN KEY (claim_id) REFERENCES claims(id)
+            );
+            -- Index
+            CREATE INDEX IF NOT EXISTS idx_analysis_url ON analysis_results(url);
+            CREATE INDEX IF NOT EXISTS idx_triple_subject ON rdf_triples(subject);
+            CREATE INDEX IF NOT EXISTS idx_triple_graph ON rdf_triples(graph_name);
+            CREATE INDEX IF NOT EXISTS idx_sources_domain ON sources(domain);
+        """)
+        self.conn.commit()
+        print(f"[SysCREDStore] SQLite initialisé: {self.db_path}")
+    # =========================================================================
+    # ONTOLOGY / RDF TRIPLES
+    # =========================================================================
+    def sync_ontology(self, ontology_manager) -> Dict[str, int]:
+        """
+        Synchronise les graphes RDFLib vers SQLite.
+        Args:
+            ontology_manager: Instance avec base_graph et data_graph
+        """
+        result = {'base_synced': 0, 'data_synced': 0}
+        try:
+            # Sync base ontology
+            if hasattr(ontology_manager, 'base_graph') and ontology_manager.base_graph:
+                result['base_synced'] = self._sync_graph(
+                    ontology_manager.base_graph,
+                    graph_name='base'
+                )
+            # Sync data graph
+            if hasattr(ontology_manager, 'data_graph') and ontology_manager.data_graph:
+                result['data_synced'] = self._sync_graph(
+                    ontology_manager.data_graph,
+                    graph_name='data'
+                )
+            self.conn.commit()
+            print(f"[SysCREDStore] Synced {result['base_synced']} base + {result['data_synced']} data triples")
+        except Exception as e:
+            result['error'] = str(e)
+            print(f"[SysCREDStore] Sync error: {e}")
+        return result
+    def _sync_graph(self, graph, graph_name: str) -> int:
+        """Sync un graphe RDFLib vers SQLite."""
+        from rdflib import Literal
+        count = 0
+        cursor = self.conn.cursor()
+        for s, p, o in graph:
+            subject = str(s)
+            predicate = str(p)
+            obj_value = str(o)
+            obj_type = 'literal' if isinstance(o, Literal) else 'uri'
+            try:
+                cursor.execute("""
+                    INSERT OR IGNORE INTO rdf_triples
+                    (subject, predicate, object, object_type, graph_name)
+                    VALUES (?, ?, ?, ?, ?)
+                """, (subject, predicate, obj_value, obj_type, graph_name))
+                count += 1
+            except:
+                pass
+        return count
+    def get_triple_stats(self) -> Dict[str, int]:
+        """Statistiques des triplets."""
+        cursor = self.conn.cursor()
+        cursor.execute("SELECT COUNT(*) FROM rdf_triples WHERE graph_name = 'base'")
+        base = cursor.fetchone()[0]
+        cursor.execute("SELECT COUNT(*) FROM rdf_triples WHERE graph_name = 'data'")
+        data = cursor.fetchone()[0]
+        return {
+            'base_triples': base,
+            'data_triples': data,
+            'total_triples': base + data
+        }
+    # =========================================================================
+    # ANALYSIS RESULTS
+    # =========================================================================
+    def save_analysis(self, url: str, credibility_score: float,
+                      summary: str = None, score_details: Dict = None,
+                      source_reputation: str = None, fact_check_count: int = 0) -> int:
+        """Sauvegarde un résultat d'analyse."""
+        domain = urlparse(url).netloc
+        cursor = self.conn.cursor()
+        cursor.execute("""
+            INSERT INTO analysis_results
+            (url, credibility_score, summary, score_details, source_reputation,
+             fact_check_count, domain)
+            VALUES (?, ?, ?, ?, ?, ?, ?)
+        """, (
+            url, credibility_score, summary,
+            json.dumps(score_details) if score_details else None,
+            source_reputation, fact_check_count, domain
+        ))
+        self.conn.commit()
+        result_id = cursor.lastrowid
+        print(f"[SysCREDStore] Saved analysis #{result_id} for {domain}")
+        # Update source stats
+        self._update_source(domain, credibility_score)
+        return result_id
+    def get_history(self, url: str = None, limit: int = 50) -> List[Dict]:
+        """Récupère l'historique des analyses."""
+        cursor = self.conn.cursor()
+        if url:
+            cursor.execute("""
+                SELECT * FROM analysis_results
+                WHERE url = ? ORDER BY created_at DESC LIMIT ?
+            """, (url, limit))
+        else:
+            cursor.execute("""
+                SELECT * FROM analysis_results
+                ORDER BY created_at DESC LIMIT ?
+            """, (limit,))
+        return [dict(row) for row in cursor.fetchall()]
+    # =========================================================================
+    # SOURCES
+    # =========================================================================
+    def _update_source(self, domain: str, score: float = None):
+        """Met à jour les stats d'une source."""
+        cursor = self.conn.cursor()
+        cursor.execute("SELECT id, analysis_count FROM sources WHERE domain = ?", (domain,))
+        row = cursor.fetchone()
+        if row:
+            cursor.execute("""
+                UPDATE sources SET
+                    analysis_count = analysis_count + 1,
+                    last_analyzed = CURRENT_TIMESTAMP,
+                    reputation_score = COALESCE(?, reputation_score)
+                WHERE domain = ?
+            """, (score, domain))
+        else:
+            cursor.execute("""
+                INSERT INTO sources (domain, reputation_score, analysis_count, last_analyzed)
+                VALUES (?, ?, 1, CURRENT_TIMESTAMP)
+            """, (domain, score))
+        self.conn.commit()
+    def get_source(self, domain: str) -> Optional[Dict]:
+        """Récupère les infos d'une source."""
+        cursor = self.conn.cursor()
+        cursor.execute("SELECT * FROM sources WHERE domain = ?", (domain,))
+        row = cursor.fetchone()
+        return dict(row) if row else None
+    # =========================================================================
+    # GLOBAL STATS
+    # =========================================================================
+    def get_stats(self) -> Dict[str, Any]:
+        """Statistiques globales."""
+        cursor = self.conn.cursor()
+        cursor.execute("SELECT COUNT(*) FROM analysis_results")
+        total_analyses = cursor.fetchone()[0]
+        cursor.execute("SELECT COUNT(*) FROM sources")
+        unique_domains = cursor.fetchone()[0]
+        cursor.execute("SELECT AVG(credibility_score) FROM analysis_results")
+        avg_score = cursor.fetchone()[0]
+        triple_stats = self.get_triple_stats()
+        return {
+            'total_analyses': total_analyses,
+            'unique_domains': unique_domains,
+            'avg_credibility': round(avg_score, 2) if avg_score else None,
+            **triple_stats
+        }
+    def close(self):
+        """Ferme la connexion."""
+        if self.conn:
+            self.conn.close()
+# ============================================================================
+# INTEGRATION
+# ============================================================================
+def sync_ontology_to_db():
+    """Synchronise l'ontologie vers la base de données."""
+    import sys
+    sys.path.insert(0, str(BASE_DIR))
+    try:
+        from ontology_manager import OntologyManager
+        from config import Config
+        # Init ontology
+        onto = OntologyManager(
+            base_ontology_path=str(Config.ONTOLOGY_BASE_PATH),
+            data_path=str(Config.ONTOLOGY_DATA_PATH)
+        )
+        # Init store
+        store = SysCREDStore()
+        # Sync
+        result = store.sync_ontology(onto)
+        print(f"\n✅ Sync complete: {result}")
+        # Stats
+        stats = store.get_stats()
+        print(f"📊 Stats: {stats}")
+        return store
+    except ImportError as e:
+        print(f"Import error: {e}")
+        return None
+# ============================================================================
+# CLI
+# ============================================================================
+if __name__ == "__main__":
+    print("=" * 60)
+    print("SysCRED Storage - Synchronisation des triplets")
+    print("=" * 60)
+    store = sync_ontology_to_db()
+    if store:
+        print("\n✅ Base de données prête!")
+        print(f"   Fichier: {store.db_path}")

syscred/eeat_calculator.py CHANGED Viewed

@@ -1,41 +1,118 @@
 # -*- coding: utf-8 -*-
 """
-E-E-A-T Calculator Module - SysCRED
-====================================
-Google Quality Rater Guidelines implementation.
-E-E-A-T Scores:
-- Experience: Domain age, content richness
-- Expertise: Technical vocabulary, citations
-- Authority: Estimated PageRank, backlinks
-- Trust: HTTPS, unbiased sentiment
-(c) Dominique S. Loyer - PhD Thesis Prototype
 """
 import re
-from typing import Dict, Optional
-from urllib.parse import urlparse
 class EEATCalculator:
     """
-    Calculate E-E-A-T scores based on Google Quality Rater Guidelines.
     """
-    # Technical terms that indicate expertise
-    TECHNICAL_TERMS = {
-        'research', 'study', 'analysis', 'data', 'evidence', 'methodology',
-        'peer-reviewed', 'journal', 'university', 'professor', 'dr.', 'phd',
-        'statistics', 'experiment', 'hypothesis', 'publication', 'citation',
-        'algorithm', 'framework', 'systematic', 'empirical', 'quantitative'
     }
-    # Trusted domains (simplified list)
-    TRUSTED_DOMAINS = {
-        '.edu', '.gov', '.org', 'reuters.com', 'apnews.com', 'bbc.com',
-        'nature.com', 'science.org', 'who.int', 'un.org', 'wikipedia.org',
-        'lemonde.fr', 'radio-canada.ca', 'uqam.ca', 'umontreal.ca'
     }
     def __init__(self):
@@ -44,227 +121,346 @@ class EEATCalculator:
     def calculate(
         self,
-        url: Optional[str] = None,
-        text: Optional[str] = None,
-        sentiment_score: float = 0.5,
-        has_citations: bool = False,
-        domain_age_years: int = 0
-    ) -> Dict:
         """
-        Calculate E-E-A-T scores.
         Args:
             url: Source URL
-            text: Content text
-            sentiment_score: 0-1 (0.5 = neutral is best for trust)
-            has_citations: Whether content has citations
-            domain_age_years: Estimated domain age
         Returns:
-            {
-                'experience': 0.75,
-                'expertise': 0.80,
-                'authority': 0.65,
-                'trust': 0.90,
-                'overall': 0.78,
-                'details': {...}
-            }
         """
-        details = {}
-        # --- EXPERIENCE ---
-        experience = 0.5
-        if domain_age_years >= 10:
-            experience += 0.3
-        elif domain_age_years >= 5:
-            experience += 0.2
-        elif domain_age_years >= 2:
-            experience += 0.1
-        if text:
-            word_count = len(text.split())
-            if word_count >= 1000:
-                experience += 0.15
-            elif word_count >= 500:
-                experience += 0.1
-        experience = min(experience, 1.0)
-        details['experience_factors'] = {
-            'domain_age_bonus': domain_age_years >= 2,
-            'content_richness': len(text.split()) if text else 0
-        }
-        # --- EXPERTISE ---
-        expertise = 0.4
-        tech_count = 0
-        if text:
-            text_lower = text.lower()
-            for term in self.TECHNICAL_TERMS:
-                if term in text_lower:
-                    tech_count += 1
-            if tech_count >= 5:
-                expertise += 0.35
-            elif tech_count >= 3:
-                expertise += 0.25
-            elif tech_count >= 1:
-                expertise += 0.15
-        if has_citations:
-            expertise += 0.2
-        expertise = min(expertise, 1.0)
-        details['expertise_factors'] = {
-            'technical_terms_found': tech_count,
-            'has_citations': has_citations
-        }
-        # --- AUTHORITY ---
-        authority = 0.3
-        if url:
-            parsed = urlparse(url)
-            domain = parsed.netloc.lower()
-            for trusted in self.TRUSTED_DOMAINS:
-                if trusted in domain:
-                    authority += 0.4
-                    break
-            if parsed.scheme == 'https':
-                authority += 0.1
-        # Check for author indicators in text
-        if text:
-            author_patterns = [r'by\s+\w+\s+\w+', r'author:', r'written by', r'par\s+\w+']
-            for pattern in author_patterns:
-                if re.search(pattern, text.lower()):
-                    authority += 0.15
-                    break
-        authority = min(authority, 1.0)
-        details['authority_factors'] = {
-            'trusted_domain': False,
-            'https': url and urlparse(url).scheme == 'https' if url else False
-        }
-        # --- TRUST ---
-        trust = 0.5
-        # Neutral sentiment is best (0.5)
-        sentiment_deviation = abs(sentiment_score - 0.5)
-        if sentiment_deviation < 0.1:
-            trust += 0.3  # Very neutral
-        elif sentiment_deviation < 0.2:
-            trust += 0.2
-        elif sentiment_deviation < 0.3:
-            trust += 0.1
-        if url and urlparse(url).scheme == 'https':
-            trust += 0.15
-        trust = min(trust, 1.0)
-        details['trust_factors'] = {
-            'sentiment_neutrality': 1 - sentiment_deviation * 2,
-            'secure_connection': url and 'https' in url if url else False
-        }
-        # --- OVERALL ---
-        overall = (experience * 0.2 + expertise * 0.3 +
-                   authority * 0.25 + trust * 0.25)
-        return {
-            'experience': round(experience, 2),
-            'expertise': round(expertise, 2),
-            'authority': round(authority, 2),
-            'trust': round(trust, 2),
-            'overall': round(overall, 2),
-            'details': details
-        }
-    def get_explanation(self, scores: Dict) -> str:
-        """Generate human-readable explanation of E-E-A-T scores."""
         explanations = []
-        exp = scores.get('experience', 0)
-        if exp >= 0.7:
-            explanations.append("✅ Expérience: Source établie avec contenu riche")
-        elif exp >= 0.5:
-            explanations.append("⚠️ Expérience: Source moyennement établie")
         else:
-            explanations.append("❌ Expérience: Source nouvelle ou contenu limité")
-        ext = scores.get('expertise', 0)
-        if ext >= 0.7:
-            explanations.append("✅ Expertise: Vocabulaire technique, citations présentes")
-        elif ext >= 0.5:
-            explanations.append("⚠️ Expertise: Niveau technique moyen")
         else:
-            explanations.append("❌ Expertise: Manque de terminologie spécialisée")
-        auth = scores.get('authority', 0)
-        if auth >= 0.7:
-            explanations.append("✅ Autorité: Domaine reconnu et fiable")
-        elif auth >= 0.5:
-            explanations.append("⚠️ Autorité: Niveau d'autorité moyen")
         else:
-            explanations.append("❌ Autorité: Source non reconnue")
-        tr = scores.get('trust', 0)
-        if tr >= 0.7:
-            explanations.append("✅ Confiance: Ton neutre, connexion sécurisée")
-        elif tr >= 0.5:
-            explanations.append("⚠️ Confiance: Niveau de confiance moyen")
         else:
-            explanations.append("❌ Confiance: Ton biaisé ou connexion non sécurisée")
         return "\n".join(explanations)
-# Singleton
-_calculator = None
-def get_calculator() -> EEATCalculator:
-    """Get or create E-E-A-T calculator singleton."""
-    global _calculator
-    if _calculator is None:
-        _calculator = EEATCalculator()
-    return _calculator
-# --- Testing ---
 if __name__ == "__main__":
-    print("=" * 60)
-    print("SysCRED E-E-A-T Calculator - Test")
-    print("=" * 60)
     calc = EEATCalculator()
-    test_url = "https://www.nature.com/articles/example"
     test_text = """
-    A peer-reviewed study published in the journal Nature found evidence
-    that the new methodology significantly improves research outcomes.
-    Dr. Smith from Harvard University presented the statistics at the conference.
     """
-    result = calc.calculate(
         url=test_url,
         text=test_text,
-        sentiment_score=0.5,
-        has_citations=True,
-        domain_age_years=15
     )
-    print("\n--- E-E-A-T Scores ---")
-    print(f"  Experience: {result['experience']:.0%}")
-    print(f"  Expertise:  {result['expertise']:.0%}")
-    print(f"  Authority:  {result['authority']:.0%}")
-    print(f"  Trust:      {result['trust']:.0%}")
-    print(f"  ─────────────────")
-    print(f"  OVERALL:    {result['overall']:.0%}")
-    print("\n--- Explanation ---")
-    print(calc.get_explanation(result))
-    print("\n" + "=" * 60)

+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
+E-E-A-T Metrics Calculator for SysCRED
+========================================
+Calculates Google-style E-E-A-T metrics (Experience, Expertise, Authority, Trust).
+These metrics mirror modern Google ranking signals:
+- Experience: Domain age, content freshness
+- Expertise: Author identification, depth of content
+- Authority: PageRank simulation, citations/backlinks
+- Trust: HTTPS, fact-checks, low bias score
 """
+from typing import Dict, Any, Optional, List
+from dataclasses import dataclass
 import re
+from datetime import datetime
+import logging
+logger = logging.getLogger(__name__)
+@dataclass
+class EEATScore:
+    """E-E-A-T score container."""
+    experience: float  # 0-1
+    expertise: float   # 0-1
+    authority: float   # 0-1
+    trust: float       # 0-1
+    @property
+    def overall(self) -> float:
+        """Weighted average of all E-E-A-T components."""
+        # Weights based on Google's emphasis
+        weights = {
+            'experience': 0.15,
+            'expertise': 0.25,
+            'authority': 0.35,
+            'trust': 0.25
+        }
+        return (
+            self.experience * weights['experience'] +
+            self.expertise * weights['expertise'] +
+            self.authority * weights['authority'] +
+            self.trust * weights['trust']
+        )
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary for JSON serialization."""
+        return {
+            'experience': round(self.experience, 3),
+            'expertise': round(self.expertise, 3),
+            'authority': round(self.authority, 3),
+            'trust': round(self.trust, 3),
+            'overall': round(self.overall, 3),
+            'experience_pct': f"{int(self.experience * 100)}%",
+            'expertise_pct': f"{int(self.expertise * 100)}%",
+            'authority_pct': f"{int(self.authority * 100)}%",
+            'trust_pct': f"{int(self.trust * 100)}%",
+            'overall_pct': f"{int(self.overall * 100)}%"
+        }
 class EEATCalculator:
     """
+    Calculate E-E-A-T metrics from various signals.
+    Mirrors Google's quality rater guidelines:
+    - Experience: Has the author demonstrated real experience?
+    - Expertise: Is the content expert-level?
+    - Authority: Is the source recognized as authoritative?
+    - Trust: Is the source trustworthy?
     """
+    # Known authoritative domains
+    AUTHORITATIVE_DOMAINS = {
+        # News
+        'lemonde.fr': 0.95,
+        'lefigaro.fr': 0.90,
+        'liberation.fr': 0.88,
+        'nytimes.com': 0.95,
+        'washingtonpost.com': 0.93,
+        'theguardian.com': 0.92,
+        'bbc.com': 0.94,
+        'bbc.co.uk': 0.94,
+        'reuters.com': 0.96,
+        'apnews.com': 0.95,
+        # Academic
+        'nature.com': 0.98,
+        'science.org': 0.98,
+        'pubmed.ncbi.nlm.nih.gov': 0.97,
+        'scholar.google.com': 0.85,
+        # Government
+        'gouv.fr': 0.90,
+        'gov.uk': 0.90,
+        'whitehouse.gov': 0.88,
+        'europa.eu': 0.92,
+        # Fact-checkers
+        'snopes.com': 0.88,
+        'factcheck.org': 0.90,
+        'politifact.com': 0.88,
+        'fullfact.org': 0.89,
+        # Wikipedia (moderate authority)
+        'wikipedia.org': 0.75,
+        'fr.wikipedia.org': 0.75,
+        'en.wikipedia.org': 0.75,
     }
+    # Low-trust domains (misinformation sources)
+    LOW_TRUST_DOMAINS = {
+        'infowars.com': 0.1,
+        'breitbart.com': 0.3,
+        'naturalnews.com': 0.15,
+        # Add more as needed
     }
     def __init__(self):
     def calculate(
         self,
+        url: str,
+        text: str,
+        nlp_analysis: Optional[Dict[str, Any]] = None,
+        pagerank: Optional[float] = None,
+        fact_checks: Optional[List[Dict]] = None,
+        domain_age_years: Optional[float] = None,
+        has_https: bool = True,
+        author_identified: bool = False,
+        seo_score: Optional[float] = None
+    ) -> EEATScore:
         """
+        Calculate E-E-A-T scores from available signals.
         Args:
             url: Source URL
+            text: Article text content
+            nlp_analysis: NLP analysis results (sentiment, coherence, bias)
+            pagerank: Simulated PageRank score (0-1)
+            fact_checks: List of fact-check results
+            domain_age_years: Domain age in years (from WHOIS)
+            has_https: Whether site uses HTTPS
+            author_identified: Whether author is clearly identified
+            seo_score: SEO/technical quality score
         Returns:
+            EEATScore with all component scores
         """
+        # Extract domain from URL
+        domain = self._extract_domain(url)
+        # Calculate each component
+        experience = self._calculate_experience(
+            domain_age_years,
+            text,
+            nlp_analysis
+        )
+        expertise = self._calculate_expertise(
+            text,
+            author_identified,
+            nlp_analysis
+        )
+        authority = self._calculate_authority(
+            domain,
+            pagerank,
+            seo_score
+        )
+        trust = self._calculate_trust(
+            domain,
+            has_https,
+            fact_checks,
+            nlp_analysis
+        )
+        return EEATScore(
+            experience=experience,
+            expertise=expertise,
+            authority=authority,
+            trust=trust
+        )
+    def _extract_domain(self, url: str) -> str:
+        """Extract domain from URL."""
+        import re
+        match = re.search(r'https?://(?:www\.)?([^/]+)', url)
+        return match.group(1).lower() if match else url.lower()
+    def _calculate_experience(
+        self,
+        domain_age_years: Optional[float],
+        text: str,
+        nlp_analysis: Optional[Dict]
+    ) -> float:
+        """
+        Calculate Experience score.
+        Factors:
+        - Domain age (longer = more experience)
+        - Content freshness (recently updated)
+        - First-hand experience indicators in text
+        """
+        score = 0.5  # Base score
+        # Domain age contribution (max 0.3)
+        if domain_age_years is not None:
+            age_score = min(domain_age_years / 20, 1.0) * 0.3  # 20 years = max
+            score += age_score
+        else:
+            score += 0.15  # Assume moderate age
+        # Content depth contribution (max 0.2)
+        word_count = len(text.split()) if text else 0
+        if word_count > 1000:
+            score += 0.2
+        elif word_count > 500:
+            score += 0.15
+        elif word_count > 200:
+            score += 0.1
+        # First-hand experience indicators (max 0.1)
+        experience_indicators = [
+            r'\b(j\'ai|je suis|nous avons|I have|we have|in my experience)\b',
+            r'\b(interview|entretien|témoignage|witness|firsthand)\b',
+            r'\b(sur place|on the ground|eyewitness)\b'
+        ]
+        for pattern in experience_indicators:
+            if re.search(pattern, text, re.IGNORECASE):
+                score += 0.03
+        return min(score, 1.0)
+    def _calculate_expertise(
+        self,
+        text: str,
+        author_identified: bool,
+        nlp_analysis: Optional[Dict]
+    ) -> float:
+        """
+        Calculate Expertise score.
+        Factors:
+        - Author identification
+        - Technical depth of content
+        - Citation of sources
+        - Coherence (from NLP)
+        """
+        score = 0.4  # Base score
+        # Author identification (0.2)
+        if author_identified:
+            score += 0.2
+        # Citation indicators (max 0.2)
+        citation_patterns = [
+            r'\b(selon|according to|d\'après|source:)\b',
+            r'\b(étude|study|research|rapport|report)\b',
+            r'\b(expert|spécialiste|chercheur|professor|Dr\.)\b',
+            r'\[([\d]+)\]',  # [1] style citations
+            r'https?://[^\s]+'  # Links
+        ]
+        citation_count = 0
+        for pattern in citation_patterns:
+            citation_count += len(re.findall(pattern, text, re.IGNORECASE))
+        score += min(citation_count * 0.02, 0.2)
+        # Coherence from NLP analysis (0.2)
+        if nlp_analysis and 'coherence' in nlp_analysis:
+            coherence = nlp_analysis['coherence']
+            if isinstance(coherence, dict):
+                coherence = coherence.get('score', 0.5)
+            score += coherence * 0.2
+        else:
+            score += 0.1  # Assume moderate coherence
+        return min(score, 1.0)
+    def _calculate_authority(
+        self,
+        domain: str,
+        pagerank: Optional[float],
+        seo_score: Optional[float]
+    ) -> float:
+        """
+        Calculate Authority score.
+        Factors:
+        - Known authoritative domain
+        - PageRank simulation
+        - SEO/technical quality
+        """
+        score = 0.3  # Base score
+        # Known domain authority (max 0.5)
+        for known_domain, authority in self.AUTHORITATIVE_DOMAINS.items():
+            if known_domain in domain:
+                score = max(score, authority * 0.5 + 0.3)
+                break
+        # Check low-trust domains
+        for low_trust_domain, low_score in self.LOW_TRUST_DOMAINS.items():
+            if low_trust_domain in domain:
+                score = min(score, low_score)
+                break
+        # PageRank contribution (max 0.3)
+        if pagerank is not None:
+            score += pagerank * 0.3
+        else:
+            score += 0.15  # Assume moderate pagerank
+        # SEO score contribution (max 0.2)
+        if seo_score is not None:
+            score += seo_score * 0.2
+        else:
+            score += 0.1
+        return min(score, 1.0)
+    def _calculate_trust(
+        self,
+        domain: str,
+        has_https: bool,
+        fact_checks: Optional[List[Dict]],
+        nlp_analysis: Optional[Dict]
+    ) -> float:
+        """
+        Calculate Trust score.
+        Factors:
+        - HTTPS
+        - Fact-check results
+        - Bias score (low = better)
+        - Known trustworthy domain
+        """
+        score = 0.4  # Base score
+        # HTTPS (0.1)
+        if has_https:
+            score += 0.1
+        # Fact-check results (max 0.3)
+        if fact_checks:
+            positive_checks = sum(1 for fc in fact_checks
+                                  if fc.get('rating', '').lower() in ['true', 'vrai', 'correct'])
+            negative_checks = sum(1 for fc in fact_checks
+                                  if fc.get('rating', '').lower() in ['false', 'faux', 'incorrect', 'pants-fire'])
+            if positive_checks > 0:
+                score += 0.2
+            if negative_checks > 0:
+                score -= 0.3
+        # Bias score (max 0.2, lower bias = higher trust)
+        if nlp_analysis:
+            bias_data = nlp_analysis.get('bias_analysis', {})
+            if isinstance(bias_data, dict):
+                bias_score = bias_data.get('score', 0.3)
+            else:
+                bias_score = 0.3
+            # Invert: low bias = high trust contribution
+            score += (1 - bias_score) * 0.2
+        else:
+            score += 0.1
+        # Known trustworthy domain (0.1)
+        for known_domain in self.AUTHORITATIVE_DOMAINS:
+            if known_domain in domain:
+                score += 0.1
+                break
+        # Known low-trust domain (penalty)
+        for low_trust_domain in self.LOW_TRUST_DOMAINS:
+            if low_trust_domain in domain:
+                score -= 0.3
+                break
+        return max(min(score, 1.0), 0.0)
+    def explain_score(self, eeat: EEATScore, url: str) -> str:
+        """
+        Generate human-readable explanation of E-E-A-T score.
+        Args:
+            eeat: EEATScore instance
+            url: Source URL
+        Returns:
+            Formatted explanation string
+        """
+        domain = self._extract_domain(url)
         explanations = []
+        # Experience
+        if eeat.experience >= 0.8:
+            explanations.append(f"✅ **Expérience élevée** ({eeat.experience_pct}): Source établie depuis longtemps")
+        elif eeat.experience >= 0.5:
+            explanations.append(f"🔶 **Expérience moyenne** ({eeat.experience_pct}): Source modérément établie")
         else:
+            explanations.append(f"⚠️ **Expérience faible** ({eeat.experience_pct}): Source récente ou peu connue")
+        # Expertise
+        if eeat.expertise >= 0.8:
+            explanations.append(f"✅ **Expertise élevée** ({eeat.expertise_pct}): Contenu approfondi avec citations")
+        elif eeat.expertise >= 0.5:
+            explanations.append(f"🔶 **Expertise moyenne** ({eeat.expertise_pct}): Contenu standard")
         else:
+            explanations.append(f"⚠️ **Expertise faible** ({eeat.expertise_pct}): Manque de profondeur")
+        # Authority
+        if eeat.authority >= 0.8:
+            explanations.append(f"✅ **Autorité élevée** ({eeat.authority_pct}): Source très citée et reconnue")
+        elif eeat.authority >= 0.5:
+            explanations.append(f"🔶 **Autorité moyenne** ({eeat.authority_pct}): Source modérément reconnue")
         else:
+            explanations.append(f"⚠️ **Autorité faible** ({eeat.authority_pct}): Peu de citations externes")
+        # Trust
+        if eeat.trust >= 0.8:
+            explanations.append(f"✅ **Confiance élevée** ({eeat.trust_pct}): Faits vérifiés, pas de biais")
+        elif eeat.trust >= 0.5:
+            explanations.append(f"🔶 **Confiance moyenne** ({eeat.trust_pct}): Quelques signaux de confiance")
         else:
+            explanations.append(f"⚠️ **Confiance faible** ({eeat.trust_pct}): Prudence recommandée")
         return "\n".join(explanations)
+# Test
 if __name__ == "__main__":
     calc = EEATCalculator()
+    test_url = "https://www.lemonde.fr/politique/article/2024/01/06/trump.html"
     test_text = """
+    Selon une étude du chercheur Dr. Martin, l'insurrection du 6 janvier 2021
+    au Capitol a été un événement marquant. Notre reporter sur place a témoigné
+    des événements. Les experts politiques analysent les conséquences.
     """
+    nlp_analysis = {
+        'coherence': {'score': 0.8},
+        'bias_analysis': {'score': 0.2}
+    }
+    eeat = calc.calculate(
         url=test_url,
         text=test_text,
+        nlp_analysis=nlp_analysis,
+        pagerank=0.7,
+        has_https=True,
+        author_identified=True
     )
+    print("=== E-E-A-T Scores ===")
+    print(f"Experience: {eeat.experience_pct}")
+    print(f"Expertise:  {eeat.expertise_pct}")
+    print(f"Authority:  {eeat.authority_pct}")
+    print(f"Trust:      {eeat.trust_pct}")
+    print(f"Overall:    {eeat.overall_pct}")
+    print("\n=== Explanation ===")
+    print(calc.explain_score(eeat, test_url))

syscred/ner_analyzer.py CHANGED Viewed

@@ -1,198 +1,283 @@
 # -*- coding: utf-8 -*-
 """
-NER Analyzer Module - SysCRED
-==============================
-Named Entity Recognition for fact-checking enhancement.
-Extracts: PERSON, ORG, GPE, DATE, MISC entities
-(c) Dominique S. Loyer - PhD Thesis Prototype
 """
-import os
-# Check for spaCy
 try:
     import spacy
     HAS_SPACY = True
 except ImportError:
     HAS_SPACY = False
-    print("[NER] spaCy not installed. NER disabled.")
 class NERAnalyzer:
     """
-    Named Entity Recognition using spaCy.
-    Supports:
-    - French (fr_core_news_md)
-    - English (en_core_web_sm)
     """
-    # Entity type mapping with icons
-    ENTITY_ICONS = {
-        'PERSON': '👤',
-        'PER': '👤',
-        'ORG': '🏢',
-        'GPE': '📍',
-        'LOC': '📍',
-        'DATE': '📅',
-        'TIME': '🕐',
-        'MONEY': '💰',
-        'MISC': '🏷️',
-        'NORP': '👥',
-        'FAC': '🏛️',
-        'PRODUCT': '📦',
-        'EVENT': '🎉',
-        'WORK_OF_ART': '🎨',
-        'LAW': '⚖️',
-        'LANGUAGE': '🗣️',
     }
-    def __init__(self, language: str = 'en'):
         """
         Initialize NER analyzer.
         Args:
-            language: 'en' or 'fr'
         """
-        self.language = language
         self.nlp = None
-        self.enabled = False
         if HAS_SPACY:
-            self._load_model()
-    def _load_model(self):
-        """Load the appropriate spaCy model."""
-        models = {
-            'en': ['en_core_web_sm', 'en_core_web_md'],
-            'fr': ['fr_core_news_md', 'fr_core_news_sm']
-        }
-        for model_name in models.get(self.language, models['en']):
             try:
                 self.nlp = spacy.load(model_name)
-                self.enabled = True
-                print(f"[NER] Loaded model: {model_name}")
-                break
-            except OSError:
-                continue
-        if not self.enabled:
-            print(f"[NER] No model found for language: {self.language}")
-    def extract_entities(self, text: str) -> dict:
         """
         Extract named entities from text.
         Returns:
-            {
-                'entities': [
-                    {'text': 'Emmanuel Macron', 'type': 'PERSON', 'icon': '👤'},
-                    ...
-                ],
-                'summary': {
-                    'PERSON': ['Emmanuel Macron'],
-                    'ORG': ['UQAM', 'Google'],
-                    ...
-                }
-            }
         """
-        if not self.enabled or not text:
-            return {'entities': [], 'summary': {}}
         doc = self.nlp(text)
-        entities = []
-        summary = {}
-        seen = set()
         for ent in doc.ents:
-            # Avoid duplicates
-            key = (ent.text.lower(), ent.label_)
-            if key in seen:
-                continue
-            seen.add(key)
-            entity = {
                 'text': ent.text,
-                'type': ent.label_,
-                'icon': self.ENTITY_ICONS.get(ent.label_, '🏷️'),
                 'start': ent.start_char,
-                'end': ent.end_char
             }
-            entities.append(entity)
-            # Group by type
-            if ent.label_ not in summary:
-                summary[ent.label_] = []
-            summary[ent.label_].append(ent.text)
-        return {
-            'entities': entities,
-            'summary': summary,
-            'count': len(entities)
         }
-    def analyze_for_factcheck(self, text: str) -> dict:
         """
-        Analyze text for fact-checking relevance.
-        Returns entities with credibility hints.
         """
-        result = self.extract_entities(text)
-        # Add fact-checking hints
-        hints = []
-        for ent in result.get('entities', []):
-            if ent['type'] in ['PERSON', 'PER']:
-                hints.append(f"Verify claims about {ent['text']}")
-            elif ent['type'] == 'ORG':
-                hints.append(f"Check {ent['text']} official sources")
-            elif ent['type'] in ['GPE', 'LOC']:
-                hints.append(f"Verify location: {ent['text']}")
-            elif ent['type'] == 'DATE':
-                hints.append(f"Confirm date: {ent['text']}")
-        result['fact_check_hints'] = hints[:5]  # Top 5 hints
         return result
-# Singleton instance
-_analyzer = None
-def get_analyzer(language: str = 'en') -> NERAnalyzer:
-    """Get or create the NER analyzer singleton."""
-    global _analyzer
-    if _analyzer is None:
-        _analyzer = NERAnalyzer(language)
-    return _analyzer
-# --- Testing ---
 if __name__ == "__main__":
-    print("=" * 60)
-    print("SysCRED NER Analyzer - Test")
-    print("=" * 60)
-    analyzer = NERAnalyzer('en')
     test_text = """
-    Emmanuel Macron announced today that France will invest €500 million
-    in AI research. The announcement was made at the UQAM in Montreal, Canada
-    on February 8, 2026. Google and Microsoft also confirmed their participation.
     """
-    result = analyzer.analyze_for_factcheck(test_text)
-    print("\n--- Entities Found ---")
-    for ent in result['entities']:
-        print(f"  {ent['icon']} {ent['text']} ({ent['type']})")
-    print("\n--- Fact-Check Hints ---")
-    for hint in result.get('fact_check_hints', []):
-        print(f"  • {hint}")
-    print("\n" + "=" * 60)

+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
+Named Entity Recognition (NER) Analyzer for SysCRED
+====================================================
+Extracts named entities from text using spaCy.
+Entities detected:
+- PER: Persons (Donald Trump, Emmanuel Macron)
+- ORG: Organizations (FBI, UN, Google)
+- LOC: Locations (Paris, Capitol)
+- DATE: Dates (January 6, 2021)
+- MONEY: Amounts ($10 million)
+- EVENT: Events (insurrection, election)
 """
+from typing import Dict, List, Any, Optional
+import logging
+# Try to import spaCy
 try:
     import spacy
+    from spacy.language import Language
     HAS_SPACY = True
 except ImportError:
     HAS_SPACY = False
+    spacy = None
+logger = logging.getLogger(__name__)
 class NERAnalyzer:
     """
+    Named Entity Recognition analyzer using spaCy.
+    Supports French (fr_core_news_md) and English (en_core_web_md).
+    Falls back to heuristic extraction if spaCy is not available.
     """
+    # Entity type mappings for display
+    ENTITY_LABELS = {
+        'PER': {'fr': 'Personne', 'en': 'Person', 'emoji': '👤'},
+        'PERSON': {'fr': 'Personne', 'en': 'Person', 'emoji': '👤'},
+        'ORG': {'fr': 'Organisation', 'en': 'Organization', 'emoji': '🏢'},
+        'LOC': {'fr': 'Lieu', 'en': 'Location', 'emoji': '📍'},
+        'GPE': {'fr': 'Lieu géopolitique', 'en': 'Geopolitical', 'emoji': '🌍'},
+        'DATE': {'fr': 'Date', 'en': 'Date', 'emoji': '📅'},
+        'TIME': {'fr': 'Heure', 'en': 'Time', 'emoji': '⏰'},
+        'MONEY': {'fr': 'Montant', 'en': 'Money', 'emoji': '💰'},
+        'PERCENT': {'fr': 'Pourcentage', 'en': 'Percent', 'emoji': '📊'},
+        'EVENT': {'fr': 'Événement', 'en': 'Event', 'emoji': '📰'},
+        'PRODUCT': {'fr': 'Produit', 'en': 'Product', 'emoji': '📦'},
+        'LAW': {'fr': 'Loi', 'en': 'Law', 'emoji': '⚖️'},
+        'NORP': {'fr': 'Groupe', 'en': 'Group', 'emoji': '👥'},
+        'MISC': {'fr': 'Divers', 'en': 'Miscellaneous', 'emoji': '🔖'},
     }
+    def __init__(self, model_name: str = "fr_core_news_md", fallback: bool = True):
         """
         Initialize NER analyzer.
         Args:
+            model_name: spaCy model to load (fr_core_news_md, en_core_web_md)
+            fallback: If True, use heuristics when spaCy unavailable
         """
+        self.model_name = model_name
+        self.fallback = fallback
         self.nlp = None
+        self.use_heuristics = False
         if HAS_SPACY:
             try:
                 self.nlp = spacy.load(model_name)
+                logger.info(f"[NER] Loaded spaCy model: {model_name}")
+            except OSError as e:
+                logger.warning(f"[NER] Could not load model {model_name}: {e}")
+                if fallback:
+                    self.use_heuristics = True
+                    logger.info("[NER] Using heuristic entity extraction")
+        else:
+            if fallback:
+                self.use_heuristics = True
+                logger.info("[NER] spaCy not installed. Using heuristic extraction")
+    def extract_entities(self, text: str) -> Dict[str, List[Dict[str, Any]]]:
         """
         Extract named entities from text.
+        Args:
+            text: Input text to analyze
         Returns:
+            Dictionary mapping entity types to lists of entities
+            Each entity has: text, start, end, label, label_display, emoji, confidence
         """
+        if not text or len(text.strip()) == 0:
+            return {}
+        if self.nlp:
+            return self._extract_with_spacy(text)
+        elif self.use_heuristics:
+            return self._extract_with_heuristics(text)
+        else:
+            return {}
+    def _extract_with_spacy(self, text: str) -> Dict[str, List[Dict[str, Any]]]:
+        """Extract entities using spaCy NLP."""
         doc = self.nlp(text)
+        entities: Dict[str, List[Dict[str, Any]]] = {}
         for ent in doc.ents:
+            label = ent.label_
+            # Get display info
+            label_info = self.ENTITY_LABELS.get(label, {
+                'fr': label,
+                'en': label,
+                'emoji': '🔖'
+            })
+            entity_data = {
                 'text': ent.text,
                 'start': ent.start_char,
+                'end': ent.end_char,
+                'label': label,
+                'label_display': label_info.get('fr', label),
+                'emoji': label_info.get('emoji', '🔖'),
+                'confidence': 0.85  # spaCy doesn't provide confidence by default
             }
+            if label not in entities:
+                entities[label] = []
+            # Avoid duplicates
+            if not any(e['text'].lower() == entity_data['text'].lower() for e in entities[label]):
+                entities[label].append(entity_data)
+        return entities
+    def _extract_with_heuristics(self, text: str) -> Dict[str, List[Dict[str, Any]]]:
+        """
+        Fallback heuristic entity extraction.
+        Uses pattern matching for common entities.
+        """
+        import re
+        entities: Dict[str, List[Dict[str, Any]]] = {}
+        # Common patterns
+        patterns = {
+            'PER': [
+                # Known political figures
+                r'\b(Donald Trump|Joe Biden|Emmanuel Macron|Hillary Clinton|Barack Obama|'
+                r'Vladimir Putin|Angela Merkel|Justin Trudeau|Boris Johnson)\b',
+            ],
+            'ORG': [
+                r'\b(FBI|CIA|NSA|ONU|NATO|OTAN|Google|Facebook|Twitter|Meta|'
+                r'Amazon|Microsoft|Apple|CNN|BBC|Le Monde|New York Times|'
+                r'Parti Républicain|Parti Démocrate|Republican Party|Democratic Party)\b',
+            ],
+            'LOC': [
+                r'\b(Capitol|White House|Maison Blanche|Kremlin|Élysée|Pentagon|'
+                r'New York|Washington|Paris|Londres|Moscou|Berlin|Beijing)\b',
+            ],
+            'DATE': [
+                r'\b(\d{1,2}\s+(janvier|février|mars|avril|mai|juin|juillet|août|'
+                r'septembre|octobre|novembre|décembre)\s+\d{4})\b',
+                r'\b(\d{1,2}[-/]\d{1,2}[-/]\d{2,4})\b',
+                r'\b(January|February|March|April|May|June|July|August|'
+                r'September|October|November|December)\s+\d{1,2},?\s+\d{4}\b',
+            ],
+            'MONEY': [
+                r'\$[\d,]+(?:\.\d{2})?(?:\s*(?:million|billion|trillion))?',
+                r'[\d,]+(?:\.\d{2})?\s*(?:dollars?|euros?|€|\$)',
+                r'[\d,]+\s*(?:million|milliard)s?\s*(?:de\s+)?(?:dollars?|euros?)',
+            ],
+            'PERCENT': [
+                r'\b\d+(?:\.\d+)?%',
+                r'\b\d+(?:\.\d+)?\s*pour\s*cent',
+                r'\b\d+(?:\.\d+)?\s*percent',
+            ],
         }
+        for label, pattern_list in patterns.items():
+            label_info = self.ENTITY_LABELS.get(label, {'fr': label, 'emoji': '🔖'})
+            for pattern in pattern_list:
+                for match in re.finditer(pattern, text, re.IGNORECASE):
+                    entity_data = {
+                        'text': match.group(),
+                        'start': match.start(),
+                        'end': match.end(),
+                        'label': label,
+                        'label_display': label_info.get('fr', label),
+                        'emoji': label_info.get('emoji', '🔖'),
+                        'confidence': 0.70  # Lower confidence for heuristics
+                    }
+                    if label not in entities:
+                        entities[label] = []
+                    # Avoid duplicates
+                    if not any(e['text'].lower() == entity_data['text'].lower()
+                              for e in entities[label]):
+                        entities[label].append(entity_data)
+        return entities
+    def get_entity_summary(self, entities: Dict[str, List[Dict[str, Any]]]) -> str:
         """
+        Generate a human-readable summary of extracted entities.
+        Args:
+            entities: Dictionary of entities from extract_entities()
+        Returns:
+            Formatted string summary
         """
+        if not entities:
+            return "Aucune entité nommée détectée."
+        lines = []
+        for label, ent_list in entities.items():
+            label_info = self.ENTITY_LABELS.get(label, {'fr': label, 'emoji': '🔖'})
+            emoji = label_info.get('emoji', '🔖')
+            label_display = label_info.get('fr', label)
+            entity_texts = [e['text'] for e in ent_list[:5]]  # Limit to 5
+            lines.append(f"{emoji} {label_display}: {', '.join(entity_texts)}")
+        return "\n".join(lines)
+    def to_frontend_format(self, entities: Dict[str, List[Dict[str, Any]]]) -> List[Dict]:
+        """
+        Convert entities to frontend-friendly format.
+        Returns:
+            List of entities with all info for display
+        """
+        result = []
+        for label, ent_list in entities.items():
+            for ent in ent_list:
+                result.append({
+                    'text': ent['text'],
+                    'type': ent['label'],
+                    'type_display': ent.get('label_display', ent['label']),
+                    'emoji': ent.get('emoji', '🔖'),
+                    'confidence': ent.get('confidence', 0.5),
+                    'confidence_pct': f"{int(ent.get('confidence', 0.5) * 100)}%"
+                })
+        # Sort by confidence
+        result.sort(key=lambda x: x['confidence'], reverse=True)
         return result
+# Singleton instance for easy import
+_ner_analyzer: Optional[NERAnalyzer] = None
+def get_ner_analyzer(model_name: str = "fr_core_news_md") -> NERAnalyzer:
+    """Get or create singleton NER analyzer instance."""
+    global _ner_analyzer
+    if _ner_analyzer is None:
+        _ner_analyzer = NERAnalyzer(model_name=model_name, fallback=True)
+    return _ner_analyzer
+# Quick test
 if __name__ == "__main__":
+    analyzer = NERAnalyzer(fallback=True)
     test_text = """
+    Donald Trump a affirmé que l'insurrection du 6 janvier 2021 au Capitol n'est jamais arrivée.
+    Le FBI enquête sur les événements. Le président Joe Biden a condamné ces déclarations à Washington.
+    Les dégâts sont estimés à 30 millions de dollars.
     """
+    entities = analyzer.extract_entities(test_text)
+    print("=== Entités détectées ===")
+    print(analyzer.get_entity_summary(entities))
+    print("\n=== Format Frontend ===")
+    for e in analyzer.to_frontend_format(entities):
+        print(f"  {e['emoji']} {e['text']} ({e['type_display']}, {e['confidence_pct']})")

syscred/ontology_manager.py CHANGED Viewed

@@ -47,7 +47,7 @@ class OntologyManager:
     """
     # Namespace for the credibility ontology
-    CRED_NS = "https://github.com/DominiqueLoyer/systemFactChecking#"
     def __init__(self, base_ontology_path: Optional[str] = None, data_path: Optional[str] = None):
         """
@@ -254,7 +254,7 @@ class OntologyManager:
         # SPARQL query to find all evaluations for this URL
         query = """
-        PREFIX cred: <http://www.dic9335.uqam.ca/ontologies/credibility-verification#>
         PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
         SELECT ?report ?score ?level ?timestamp ?content
@@ -298,7 +298,7 @@ class OntologyManager:
         # Count evaluations
         query = """
-        PREFIX cred: <http://www.dic9335.uqam.ca/ontologies/credibility-verification#>
         SELECT (COUNT(?report) as ?count) WHERE {
             ?report a cred:RapportEvaluation .
         }
@@ -321,7 +321,7 @@ class OntologyManager:
         # Get the latest report ID
         latest_query = """
-        PREFIX cred: <https://github.com/DominiqueLoyer/systemFactChecking#>
         SELECT ?report ?timestamp WHERE {
             ?report a cred:RapportEvaluation .
             ?report cred:completionTimestamp ?timestamp .
@@ -355,7 +355,7 @@ class OntologyManager:
         # Query triples related to this report (Level 1)
         related_query = """
-        PREFIX cred: <https://github.com/DominiqueLoyer/systemFactChecking#>
         SELECT ?p ?o ?oType ?oLabel WHERE {
             <%s> ?p ?o .
             OPTIONAL { ?o a ?oType } .
@@ -463,8 +463,8 @@ if __name__ == "__main__":
     print("=== Testing OntologyManager ===\n")
     # Test with base ontology
-    base_path = "/Users/bk280625/documents041025/MonCode/sysCRED_onto26avrtil.ttl"
-    data_path = "/Users/bk280625/documents041025/MonCode/ontology/sysCRED_data.ttl"
     manager = OntologyManager(base_ontology_path=base_path, data_path=None)

     """
     # Namespace for the credibility ontology
+    CRED_NS = "https://syscred.uqam.ca/ontology#"
     def __init__(self, base_ontology_path: Optional[str] = None, data_path: Optional[str] = None):
         """
         # SPARQL query to find all evaluations for this URL
         query = """
+        PREFIX cred: <https://syscred.uqam.ca/ontology#>
         PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
         SELECT ?report ?score ?level ?timestamp ?content
         # Count evaluations
         query = """
+        PREFIX cred: <https://syscred.uqam.ca/ontology#>
         SELECT (COUNT(?report) as ?count) WHERE {
             ?report a cred:RapportEvaluation .
         }
         # Get the latest report ID
         latest_query = """
+        PREFIX cred: <https://syscred.uqam.ca/ontology#>
         SELECT ?report ?timestamp WHERE {
             ?report a cred:RapportEvaluation .
             ?report cred:completionTimestamp ?timestamp .
         # Query triples related to this report (Level 1)
         related_query = """
+        PREFIX cred: <https://syscred.uqam.ca/ontology#>
         SELECT ?p ?o ?oType ?oLabel WHERE {
             <%s> ?p ?o .
             OPTIONAL { ?o a ?oType } .
     print("=== Testing OntologyManager ===\n")
     # Test with base ontology
+    base_path = os.path.join(os.path.dirname(__file__), '..', 'ontology', 'sysCRED_onto26avrtil.ttl')
+    data_path = os.path.join(os.path.dirname(__file__), '..', 'ontology', 'sysCRED_data.ttl')
     manager = OntologyManager(base_ontology_path=base_path, data_path=None)

syscred/verification_system.py CHANGED Viewed

@@ -33,28 +33,35 @@ except ImportError:
     HAS_SBERT = False
     print("Warning: sentence-transformers not installed. Semantic coherence will use heuristics.")
-# Local imports
-from syscred.api_clients import ExternalAPIClients, WebContent, ExternalData
-from syscred.ontology_manager import OntologyManager
-from syscred.seo_analyzer import SEOAnalyzer
-from syscred.graph_rag import GraphRAG  # [NEW] GraphRAG
-from syscred.trec_retriever import TRECRetriever, Evidence, RetrievalResult  # [NEW] TREC Integration
-from syscred import config
-# [NEW] NER and E-E-A-T modules
 try:
-    from syscred.ner_analyzer import NERAnalyzer, get_ner_analyzer
-    HAS_NER = True
 except ImportError:
-    HAS_NER = False
-    print("[SysCRED] Warning: NER module not available")
 try:
     from syscred.eeat_calculator import EEATCalculator, EEATScore
-    HAS_EEAT = True
 except ImportError:
-    HAS_EEAT = False
-    print("[SysCRED] Warning: E-E-A-T module not available")
 class CredibilityVerificationSystem:
@@ -136,6 +143,18 @@ class CredibilityVerificationSystem:
         # Weights for score calculation (Loaded from Config)
         self.weights = config.Config.SCORE_WEIGHTS
         print(f"[SysCRED] Using weights: {self.weights}")
         print("[SysCRED] System ready!")
@@ -144,40 +163,47 @@ class CredibilityVerificationSystem:
         print("[SysCRED] Loading ML models (this may take a moment)...")
         try:
-            # Sentiment analysis
             self.sentiment_pipeline = pipeline(
-                "sentiment-analysis",
-                model="distilbert-base-uncased-finetuned-sst-2-english"
             )
-            print("[SysCRED] ✓ Sentiment model loaded")
         except Exception as e:
             print(f"[SysCRED] ✗ Sentiment model failed: {e}")
         try:
-            # NER pipeline
-            self.ner_pipeline = pipeline("ner", grouped_entities=True)
-            print("[SysCRED] ✓ NER model loaded")
         except Exception as e:
             print(f"[SysCRED] ✗ NER model failed: {e}")
         try:
-            # Bias detection - Specialized model
-            # Using 'd4data/bias-detection-model' or fallback to generic
-            bias_model_name = "d4data/bias-detection-model"
             self.bias_tokenizer = AutoTokenizer.from_pretrained(bias_model_name)
             self.bias_model = AutoModelForSequenceClassification.from_pretrained(bias_model_name)
-            print("[SysCRED] ✓ Bias model loaded (d4data)")
         except Exception as e:
             print(f"[SysCRED] ✗ Bias model failed: {e}. Using heuristics.")
         try:
-            # Semantic Coherence
             if HAS_SBERT:
                 self.coherence_model = SentenceTransformer('all-MiniLM-L6-v2')
-                print("[SysCRED] ✓ Coherence model loaded (SBERT)")
         except Exception as e:
             print(f"[SysCRED] ✗ Coherence model failed: {e}")
         try:
             # LIME explainer
             self.explainer = LimeTextExplainer(class_names=['NEGATIVE', 'POSITIVE'])
@@ -338,21 +364,6 @@ class CredibilityVerificationSystem:
         # 4. Semantic Coherence
         results['coherence_score'] = self._calculate_coherence(text)
-        # 5. [NEW] E-E-A-T Score Calculation
-        if HAS_EEAT:
-            try:
-                # Initialize calc if needed (lazy load)
-                if not hasattr(self, 'eeat_calculator') or self.eeat_calculator is None:
-                    self.eeat_calculator = EEATCalculator()
-                # Calculate score
-                eeat = self.eeat_calculator.calculate_eeat(text, results.get('named_entities', []))
-                # Store in results as dict
-                results['eeat_score'] = eeat.to_dict()
-                print(f"[NLP] EEAT Score calculated: {eeat.overall_score:.2f}")
-            except Exception as e:
-                print(f"[NLP] EEAT error: {e}")
         return results
@@ -516,6 +527,26 @@ class CredibilityVerificationSystem:
             adjustment_factor = (graph_score - 0.5) * w_graph * confidence
             adjustments += adjustment_factor
             total_weight_used += w_graph * confidence  # Partial weight based on confidence
         # Final calculation
         # Base 0.5 + sum of weighted adjustments
@@ -672,11 +703,24 @@ class CredibilityVerificationSystem:
     ) -> Dict[str, Any]:
         """Generate the final evaluation report."""
         report = {
             'idRapport': f"report_{int(datetime.datetime.now().timestamp())}",
             'informationEntree': input_data,
             'dateGeneration': datetime.datetime.now().isoformat(),
             'scoreCredibilite': round(overall_score, 2),
             'resumeAnalyse': "",
             'detailsScore': {
                 'base': 0.5,
@@ -703,8 +747,6 @@ class CredibilityVerificationSystem:
             },
             # [NEW] TREC Evidence section
             'evidences': evidences or [],
-            # [NEW] TREC IR Metrics for dashboard
-            'trec_metrics': self._calculate_trec_metrics(cleaned_text, evidences),
             'metadonnees': {}
         }
@@ -758,7 +800,7 @@ class CredibilityVerificationSystem:
         })
         report['sourcesUtilisees'].append({
             'type': 'Fact Check API',
-            'results_count': len(external_data.fact_checks) if external_data.fact_checks else 0
         })
         # [NEW] Add TREC evidence source
         if evidences:
@@ -768,112 +810,9 @@ class CredibilityVerificationSystem:
                 'corpus': 'AP88-90',
                 'results_count': len(evidences)
             })
-        # [FIX] Add explicit fields for frontend
-        if nlp_results.get('named_entities'):
-            report['ner_entities'] = nlp_results.get('named_entities')
-        # Add EEAT score if available (from rule_results or nlp_results)
-        if 'eeat_score' in rule_results:
-             report['eeat_score'] = rule_results['eeat_score']
-        elif 'eeat_score' in nlp_results:
-             report['eeat_score'] = nlp_results['eeat_score']
         return report
-    def _calculate_trec_metrics(self, text: str, evidences: List[Dict[str, Any]] = None) -> Dict[str, float]:
-        """
-        Calculate TREC-style IR metrics for display on dashboard.
-        Computes:
-        - Precision: Ratio of relevant retrieved documents
-        - Recall: Ratio of relevant documents retrieved
-        - MAP: Mean Average Precision
-        - NDCG: Normalized Discounted Cumulative Gain
-        - TF-IDF: Term Frequency-Inverse Document Frequency score
-        - MRR: Mean Reciprocal Rank
-        """
-        import math
-        metrics = {
-            'precision': 0.0,
-            'recall': 0.0,
-            'map': 0.0,
-            'ndcg': 0.0,
-            'tfidf': 0.0,
-            'mrr': 0.0
-        }
-        if not text:
-            return metrics
-        # TF-IDF based on text analysis
-        words = text.lower().split()
-        if words:
-            # Simple TF calculation
-            word_counts = {}
-            for word in words:
-                word_counts[word] = word_counts.get(word, 0) + 1
-            # Calculate TF-IDF score (simplified)
-            total_words = len(words)
-            unique_words = len(word_counts)
-            # Term frequency normalized
-            tf_scores = [count / total_words for count in word_counts.values()]
-            # IDF approximation based on word distribution
-            idf_approx = math.log((unique_words + 1) / 2)
-            tfidf_sum = sum(tf * idf_approx for tf in tf_scores)
-            metrics['tfidf'] = min(1.0, tfidf_sum / max(1, unique_words) * 10)
-        # If we have evidences, calculate retrieval metrics
-        if evidences and len(evidences) > 0:
-            k = len(evidences)
-            # For now, assume all retrieved evidences have some relevance
-            # based on their retrieval scores
-            scores = [e.get('score', 0) for e in evidences]
-            if scores:
-                avg_score = sum(scores) / len(scores)
-                max_score = max(scores)
-                # Precision at K (proxy: avg relevance score)
-                metrics['precision'] = min(1.0, avg_score if avg_score <= 1.0 else avg_score / max(1, max_score))
-                # Recall (proxy: coverage based on number of evidences)
-                metrics['recall'] = min(1.0, len(evidences) / 10)  # Assuming 10 is target
-                # MAP (proxy using score ranking)
-                ap_sum = 0.0
-                for i, score in enumerate(sorted(scores, reverse=True)):
-                    ap_sum += (i + 1) / (i + 2) * score if score <= 1.0 else (i + 1) / (i + 2)
-                metrics['map'] = ap_sum / len(scores) if scores else 0.0
-                # NDCG (simplified)
-                dcg = sum(
-                    (2 ** (score if score <= 1.0 else 1.0) - 1) / math.log2(i + 2)
-                    for i, score in enumerate(scores[:k])
-                )
-                ideal_scores = sorted(scores, reverse=True)
-                idcg = sum(
-                    (2 ** (score if score <= 1.0 else 1.0) - 1) / math.log2(i + 2)
-                    for i, score in enumerate(ideal_scores[:k])
-                )
-                metrics['ndcg'] = dcg / idcg if idcg > 0 else 0.0
-                # MRR (first relevant result)
-                for i, score in enumerate(scores):
-                    if (score > 0.5 if score <= 1.0 else score > max_score / 2):
-                        metrics['mrr'] = 1.0 / (i + 1)
-                        break
-                if metrics['mrr'] == 0 and len(scores) > 0:
-                    metrics['mrr'] = 1.0  # First result
-        # Round all values
-        return {k: round(v, 4) for k, v in metrics.items()}
     def _get_score_factors(self, rule_results: Dict, nlp_results: Dict) -> List[Dict]:
         """Get list of factors that influenced the score (For UI)."""
         factors = []
@@ -1034,6 +973,40 @@ class CredibilityVerificationSystem:
         print("[SysCRED] Running NLP analysis...")
         nlp_results = self.nlp_analysis(cleaned_text)
         # 7. Calculate score (Now includes GraphRAG context)
         overall_score = self.calculate_overall_score(rule_results, nlp_results)
         print(f"[SysCRED] ✓ Credibility score: {overall_score:.2f}")
@@ -1045,6 +1018,10 @@ class CredibilityVerificationSystem:
             graph_context=graph_context
         )
         # Add similar URIs to report for ontology linking
         if similar_uris:
             report['similar_claims_uris'] = similar_uris

     HAS_SBERT = False
     print("Warning: sentence-transformers not installed. Semantic coherence will use heuristics.")
+# Local imports - Support both syscred.module and relative imports
 try:
+    from syscred.api_clients import ExternalAPIClients, WebContent, ExternalData
+    from syscred.ontology_manager import OntologyManager
+    from syscred.seo_analyzer import SEOAnalyzer
+    from syscred.graph_rag import GraphRAG
+    from syscred.trec_retriever import TRECRetriever, Evidence, RetrievalResult
+    from syscred import config
 except ImportError:
+    from api_clients import ExternalAPIClients, WebContent, ExternalData
+    from ontology_manager import OntologyManager
+    from seo_analyzer import SEOAnalyzer
+    from graph_rag import GraphRAG
+    from trec_retriever import TRECRetriever, Evidence, RetrievalResult
+    import config
+# [NER + E-E-A-T] Imports optionnels - n'interferent pas avec les imports principaux
+HAS_NER_EEAT = False
 try:
+    from syscred.ner_analyzer import NERAnalyzer
     from syscred.eeat_calculator import EEATCalculator, EEATScore
+    HAS_NER_EEAT = True
 except ImportError:
+    try:
+        from ner_analyzer import NERAnalyzer
+        from eeat_calculator import EEATCalculator, EEATScore
+        HAS_NER_EEAT = True
+    except ImportError:
+        pass
 class CredibilityVerificationSystem:
         # Weights for score calculation (Loaded from Config)
         self.weights = config.Config.SCORE_WEIGHTS
         print(f"[SysCRED] Using weights: {self.weights}")
+        # [NER + E-E-A-T] Initialize analyzers
+        self.ner_analyzer = None
+        self.eeat_calculator = None
+        if HAS_NER_EEAT:
+            try:
+                self.ner_analyzer = NERAnalyzer()
+                self.eeat_calculator = EEATCalculator()
+                print("[SysCRED] NER analyzer initialized")
+                print("[SysCRED] E-E-A-T calculator initialized")
+            except Exception as e:
+                print(f"[SysCRED] NER/E-E-A-T init failed: {e}")
         print("[SysCRED] System ready!")
         print("[SysCRED] Loading ML models (this may take a moment)...")
         try:
+            # Sentiment analysis - modèle ultra-léger
             self.sentiment_pipeline = pipeline(
+                "sentiment-analysis",
+                model="distilbert-base-uncased-finetuned-sst-2-english",
+                device=-1,
+                model_kwargs={"low_cpu_mem_usage": True}
             )
+            print("[SysCRED] ✓ Sentiment model loaded (distilbert-base)")
         except Exception as e:
             print(f"[SysCRED] ✗ Sentiment model failed: {e}")
         try:
+            # NER pipeline - modèle plus léger
+            self.ner_pipeline = pipeline(
+                "ner",
+                model="dslim/bert-base-NER",
+                grouped_entities=True,
+                device=-1,
+                model_kwargs={"low_cpu_mem_usage": True}
+            )
+            print("[SysCRED] ✓ NER model loaded (dslim/bert-base-NER)")
         except Exception as e:
             print(f"[SysCRED] ✗ NER model failed: {e}")
         try:
+            # Bias detection - modèle plus léger si possible
+            bias_model_name = "typeform/distilbert-base-uncased-mnli"
             self.bias_tokenizer = AutoTokenizer.from_pretrained(bias_model_name)
             self.bias_model = AutoModelForSequenceClassification.from_pretrained(bias_model_name)
+            print("[SysCRED] ✓ Bias model loaded (distilbert-mnli)")
         except Exception as e:
             print(f"[SysCRED] ✗ Bias model failed: {e}. Using heuristics.")
         try:
+            # Semantic Coherence - modèle MiniLM (déjà léger)
             if HAS_SBERT:
                 self.coherence_model = SentenceTransformer('all-MiniLM-L6-v2')
+                print("[SysCRED] ✓ Coherence model loaded (SBERT MiniLM)")
         except Exception as e:
             print(f"[SysCRED] ✗ Coherence model failed: {e}")
         try:
             # LIME explainer
             self.explainer = LimeTextExplainer(class_names=['NEGATIVE', 'POSITIVE'])
         # 4. Semantic Coherence
         results['coherence_score'] = self._calculate_coherence(text)
         return results
             adjustment_factor = (graph_score - 0.5) * w_graph * confidence
             adjustments += adjustment_factor
             total_weight_used += w_graph * confidence  # Partial weight based on confidence
+        # 8. [NEW] Linguistic Markers Analysis (sensationalism penalty)
+        # Penalize sensational language heavily, reward doubt markers (critical thinking)
+        linguistic = rule_results.get('linguistic_markers', {})
+        sensationalism_count = linguistic.get('sensationalism', 0)
+        doubt_count = linguistic.get('doubt', 0)
+        certainty_count = linguistic.get('certainty', 0)
+        # Sensationalism is a strong negative signal
+        if sensationalism_count > 0:
+            penalty = min(0.20, sensationalism_count * 0.05)  # Max 20% penalty
+            adjustments -= penalty
+        # Excessive certainty without sources is suspicious
+        if certainty_count > 2 and not fact_checks:
+            adjustments -= 0.05
+        # Doubt markers indicate critical/questioning tone (slight positive)
+        if doubt_count > 0:
+            adjustments += min(0.05, doubt_count * 0.02)
         # Final calculation
         # Base 0.5 + sum of weighted adjustments
     ) -> Dict[str, Any]:
         """Generate the final evaluation report."""
+        # Determine credibility level
+        if overall_score >= 0.75:
+            niveau = "Élevée"
+        elif overall_score >= 0.55:
+            niveau = "Moyenne-Élevée"
+        elif overall_score >= 0.45:
+            niveau = "Moyenne"
+        elif overall_score >= 0.25:
+            niveau = "Faible-Moyenne"
+        else:
+            niveau = "Faible"
         report = {
             'idRapport': f"report_{int(datetime.datetime.now().timestamp())}",
             'informationEntree': input_data,
             'dateGeneration': datetime.datetime.now().isoformat(),
             'scoreCredibilite': round(overall_score, 2),
+            'niveauCredibilite': niveau,
             'resumeAnalyse': "",
             'detailsScore': {
                 'base': 0.5,
             },
             # [NEW] TREC Evidence section
             'evidences': evidences or [],
             'metadonnees': {}
         }
         })
         report['sourcesUtilisees'].append({
             'type': 'Fact Check API',
+            'results_count': len(external_data.fact_checks)
         })
         # [NEW] Add TREC evidence source
         if evidences:
                 'corpus': 'AP88-90',
                 'results_count': len(evidences)
             })
         return report
     def _get_score_factors(self, rule_results: Dict, nlp_results: Dict) -> List[Dict]:
         """Get list of factors that influenced the score (For UI)."""
         factors = []
         print("[SysCRED] Running NLP analysis...")
         nlp_results = self.nlp_analysis(cleaned_text)
+        # 6.5 [NER] Named Entity Recognition
+        ner_entities = {}
+        if self.ner_analyzer and cleaned_text:
+            try:
+                ner_entities = self.ner_analyzer.extract_entities(cleaned_text)
+                total = sum(len(v) for v in ner_entities.values() if isinstance(v, list))
+                print(f"[SysCRED] NER: {total} entites detectees")
+            except Exception as e:
+                print(f"[SysCRED] NER failed: {e}")
+        # 6.6 [E-E-A-T] Experience-Expertise-Authority-Trust scoring
+        eeat_scores = {}
+        if self.eeat_calculator:
+            try:
+                url_for_eeat = input_data if is_url else ""
+                domain_age_years = None
+                if external_data.domain_age_days:
+                    domain_age_years = external_data.domain_age_days / 365.0
+                eeat_raw = self.eeat_calculator.calculate(
+                    url=url_for_eeat,
+                    text=cleaned_text,
+                    nlp_analysis=nlp_results,
+                    fact_checks=rule_results.get('fact_checking', []),
+                    domain_age_years=domain_age_years,
+                    has_https=input_data.startswith("https://") if is_url else False
+                )
+                eeat_scores = eeat_raw.to_dict() if hasattr(eeat_raw, 'to_dict') else (
+                    eeat_raw if isinstance(eeat_raw, dict) else vars(eeat_raw)
+                )
+                print(f"[SysCRED] E-E-A-T score: {eeat_scores.get('overall', 'N/A')}")
+            except Exception as e:
+                print(f"[SysCRED] E-E-A-T failed: {e}")
         # 7. Calculate score (Now includes GraphRAG context)
         overall_score = self.calculate_overall_score(rule_results, nlp_results)
         print(f"[SysCRED] ✓ Credibility score: {overall_score:.2f}")
             graph_context=graph_context
         )
+        # [NER + E-E-A-T] Always include in report (even if empty)
+        report['ner_entities'] = ner_entities
+        report['eeat_scores'] = eeat_scores
         # Add similar URIs to report for ontology linking
         if similar_uris:
             report['similar_claims_uris'] = similar_uris