DomLoyer commited on
Commit
ea9303b
·
verified ·
1 Parent(s): d228524

Sync: TREC IR metrics in verify, DB fallback, NER/EEAT fix, all API keys

Browse files
.env.example ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # --- SysCRED Environment Variables ---
2
+ # Copy this file to .env and fill in your values
3
+
4
+ # --- Google Fact Check API ---
5
+ SYSCRED_GOOGLE_API_KEY=your_google_api_key_here
6
+
7
+ # --- Supabase Database ---
8
+ SYSCRED_DATABASE_URL=postgresql://user:password@host:5432/dbname
9
+ SUPABASE_URL=https://your-project.supabase.co
10
+ SUPABASE_KEY=your_supabase_key
11
+
12
+ # --- Application URLs ---
13
+ SYSCRED_BASE_URL=https://syscred.uqam.ca
14
+
15
+ # --- Server Settings ---
16
+ SYSCRED_PORT=5001
17
+ SYSCRED_ENV=development
18
+ SYSCRED_LOAD_ML_MODELS=true
Dockerfile CHANGED
@@ -1,5 +1,5 @@
1
  # SysCRED Docker Configuration for Hugging Face Spaces
2
- # OPTIMIZED version with Distilled Models for faster startup
3
  FROM python:3.10-slim
4
 
5
  WORKDIR /app
@@ -7,48 +7,27 @@ WORKDIR /app
7
  ENV PYTHONDONTWRITEBYTECODE=1
8
  ENV PYTHONUNBUFFERED=1
9
  ENV PYTHONPATH=/app
10
-
11
- # ============================================
12
- # KEY OPTIMIZATION: Use distilled models
13
- # ============================================
14
  ENV SYSCRED_LOAD_ML_MODELS=true
15
- ENV SYSCRED_USE_DISTILLED=true
16
- ENV TRANSFORMERS_CACHE=/app/.cache/huggingface
17
- ENV HF_HOME=/app/.cache/huggingface
18
 
19
  # Install system dependencies
20
- RUN apt-get update && apt-get install -y \
21
  build-essential \
22
  && rm -rf /var/lib/apt/lists/*
23
 
24
- # Copy optimized requirements (distilled models, CPU-only torch)
25
- COPY requirements-distilled.txt /app/requirements.txt
26
 
27
- # Install dependencies
28
  RUN pip install --no-cache-dir -r requirements.txt
29
 
30
- # ============================================
31
- # PRE-DOWNLOAD DISTILLED MODELS (Build Time)
32
- # This avoids timeout during first request
33
- # ============================================
34
- RUN python -c "from transformers import pipeline; \
35
- pipeline('sentiment-analysis', model='distilbert-base-uncased-finetuned-sst-2-english'); \
36
- pipeline('ner', model='dslim/bert-base-NER'); \
37
- print('✓ Distilled models pre-downloaded')"
38
-
39
- # Download small spaCy models
40
- RUN pip install spacy && \
41
- python -m spacy download en_core_web_sm && \
42
- python -m spacy download fr_core_news_sm && \
43
- echo '✓ spaCy models downloaded'
44
-
45
- # Pre-download sentence transformer (small version)
46
- RUN python -c "from sentence_transformers import SentenceTransformer; \
47
- SentenceTransformer('all-MiniLM-L6-v2'); \
48
- print('✓ Sentence transformer pre-downloaded')"
49
 
50
  # Copy application code
51
  COPY syscred/ /app/syscred/
 
52
 
53
  # Create user for HF Spaces (required)
54
  RUN useradd -m -u 1000 user
@@ -61,5 +40,4 @@ WORKDIR /app
61
  EXPOSE 7860
62
 
63
  # Run with HF Spaces port (7860)
64
- # Increased workers to 4 for better concurrency, timeout 600s
65
- CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--workers", "1", "--threads", "4", "--timeout", "600", "syscred.backend_app:app"]
 
1
  # SysCRED Docker Configuration for Hugging Face Spaces
2
+ # Full version with PyTorch and Transformers
3
  FROM python:3.10-slim
4
 
5
  WORKDIR /app
 
7
  ENV PYTHONDONTWRITEBYTECODE=1
8
  ENV PYTHONUNBUFFERED=1
9
  ENV PYTHONPATH=/app
 
 
 
 
10
  ENV SYSCRED_LOAD_ML_MODELS=true
11
+ ENV SYSCRED_ENV=production
 
 
12
 
13
  # Install system dependencies
14
+ RUN apt-get update && apt-get install -y --no-install-recommends \
15
  build-essential \
16
  && rm -rf /var/lib/apt/lists/*
17
 
18
+ # Copy requirements (full version with ML)
19
+ COPY requirements.txt /app/requirements.txt
20
 
21
+ # Install dependencies (includes PyTorch, Transformers)
22
  RUN pip install --no-cache-dir -r requirements.txt
23
 
24
+ # Download spaCy models for NER
25
+ RUN python -m spacy download en_core_web_md || true
26
+ RUN python -m spacy download fr_core_news_md || true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  # Copy application code
29
  COPY syscred/ /app/syscred/
30
+ COPY ontology/ /app/ontology/
31
 
32
  # Create user for HF Spaces (required)
33
  RUN useradd -m -u 1000 user
 
40
  EXPOSE 7860
41
 
42
  # Run with HF Spaces port (7860)
43
+ CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--workers", "2", "--timeout", "300", "syscred.backend_app:app"]
 
ontology/sysCRED_data.ttl ADDED
The diff for this file is too large to render. See raw diff
 
ontology/sysCRED_onto26avrtil.ttl ADDED
@@ -0,0 +1,1030 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @base <https://syscred.uqam.ca/ontology#> .
2
+ @prefix : <https://syscred.uqam.ca/ontology#> .
3
+ @prefix owl: <http://www.w3.org/2002/07/owl#> .
4
+ @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
5
+ @prefix xml: <http://www.w3.org/XML/1998/namespace> .
6
+ @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
7
+ @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
8
+ #
9
+ #
10
+ # #################################################################
11
+ # #
12
+ # # Annotation properties
13
+ # #
14
+ # #################################################################
15
+ #
16
+ #
17
+ # http://www.w3.org/2002/07/owl#maxCardinality
18
+ #
19
+ #
20
+ #
21
+ # #################################################################
22
+ # #
23
+ # # Object Properties
24
+ # #
25
+ # #################################################################
26
+ #
27
+ #
28
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#analyzesSource
29
+ #
30
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#appliesRule
31
+ #
32
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#assignsCredibilityLevel
33
+ #
34
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#basedOnEvidence
35
+ #
36
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#concernsCriterion
37
+ #
38
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#concernsInformation
39
+ #
40
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#configuredByExpert
41
+ #
42
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#evaluatesCriterion
43
+ #
44
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#fetchesDataFrom
45
+ #
46
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#hasAuthor
47
+ #
48
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#hasCriterionResult
49
+ #
50
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#hasOriginalSource
51
+ #
52
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#includesNLPResult
53
+ #
54
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#includesRuleResult
55
+ #
56
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#includesSourceAnalysis
57
+ #
58
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#isReportOf
59
+ #
60
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#isSubjectOfRequest
61
+ #
62
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#obtainedVia
63
+ #
64
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#originatesFrom
65
+ #
66
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#producesReport
67
+ #
68
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#submitsRequest
69
+ #
70
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#submittedBy
71
+ #
72
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#usesModel
73
+ #
74
+ #
75
+ #
76
+ # #################################################################
77
+ # #
78
+ # # Data properties
79
+ # #
80
+ # #################################################################
81
+ #
82
+ #
83
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#authorName
84
+ #
85
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#coherenceScore
86
+ #
87
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#completionTimestamp
88
+ #
89
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#credibilityLevelValue
90
+ #
91
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#credibilityScoreValue
92
+ #
93
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#criterionResultConfidence
94
+ #
95
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#criterionResultValue
96
+ #
97
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#detectedBiases
98
+ #
99
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#evidenceSnippet
100
+ #
101
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#evidenceURL
102
+ #
103
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#informationContent
104
+ #
105
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#informationURL
106
+ #
107
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#modelName
108
+ #
109
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#modelType
110
+ #
111
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#reportSummary
112
+ #
113
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#requestStatus
114
+ #
115
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#ruleDescription
116
+ #
117
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#ruleLogic
118
+ #
119
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#ruleResultValid
120
+ #
121
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#ruleWeight
122
+ #
123
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#sentimentScore
124
+ #
125
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#sourceAnalyzedReputation
126
+ #
127
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#sourceAnalyzedURL
128
+ #
129
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#sourceMentionsCount
130
+ #
131
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#sourceReputationScore
132
+ #
133
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#sourceURL
134
+ #
135
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#submissionTimestamp
136
+ #
137
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#userName
138
+ #
139
+ #
140
+ #
141
+ # #################################################################
142
+ # #
143
+ # # Classes
144
+ # #
145
+ # #################################################################
146
+ #
147
+ #
148
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#AcademicJournal
149
+ #
150
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#ApiLLM
151
+ #
152
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#Author
153
+ #
154
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#BaseDeFaits
155
+ #
156
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#CredibilityLevel
157
+ #
158
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#Evidence
159
+ #
160
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#Expert
161
+ #
162
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#FactCheckingOrganization
163
+ #
164
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#InfoSourceAnalyse
165
+ #
166
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#InformationFaibleCredibilite
167
+ #
168
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#InformationHauteCredibilite
169
+ #
170
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#InformationMoyenneCredibilite
171
+ #
172
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#InformationSoumise
173
+ #
174
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#InformationVerifiee
175
+ #
176
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#ModeleIA
177
+ #
178
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#MoteurRecherche
179
+ #
180
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#NewsWebsite
181
+ #
182
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#Niveau_Bas
183
+ #
184
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#Niveau_Haut
185
+ #
186
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#Niveau_Moyen
187
+ #
188
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#Niveau_NonVerifie
189
+ #
190
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#PersonalBlog
191
+ #
192
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#RapportEvaluation
193
+ #
194
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#RefutingEvidence
195
+ #
196
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#RegleVerification
197
+ #
198
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#RequeteEvaluation
199
+ #
200
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#ResultatCritere
201
+ #
202
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#ResultatNLP
203
+ #
204
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#ResultatRegle
205
+ #
206
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#ResultatVerification
207
+ #
208
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#SocialMediaPlatform
209
+ #
210
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#Source
211
+ #
212
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#SupportingEvidence
213
+ #
214
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#SystemeExterne
215
+ #
216
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#User
217
+ #
218
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#VerificationCriterion
219
+ #
220
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#VerificationMethod
221
+ #
222
+ #
223
+ #
224
+ # #################################################################
225
+ # #
226
+ # # Individuals
227
+ # #
228
+ # #################################################################
229
+ #
230
+ #
231
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#Criteria_AuthorExpertise
232
+ #
233
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#Criteria_CoherenceAnalysis
234
+ #
235
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#Criteria_CrossReferencing
236
+ #
237
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#Criteria_FactCheckDB
238
+ #
239
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#Criteria_SourceReputation
240
+ #
241
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#Criteria_ToneAnalysis
242
+ #
243
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#Niveau_Bas
244
+ #
245
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#Niveau_Haut
246
+ #
247
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#Niveau_Moyen
248
+ #
249
+ # http://www.dic9335.uqam.ca/ontologies/credibility-verification#Niveau_NonVerifie
250
+ #
251
+ #
252
+ #
253
+ # #################################################################
254
+ # #
255
+ # # Annotations
256
+ # #
257
+ # #################################################################
258
+ #
259
+ #
260
+ #
261
+ #
262
+ #
263
+ #
264
+ #
265
+ #
266
+ # #################################################################
267
+ # #
268
+ # # General axioms
269
+ # #
270
+ # #################################################################
271
+ #
272
+ #
273
+ #
274
+ #
275
+ #
276
+ #
277
+ # Generated by the OWL API (version 4.5.29.2024-05-13T12:11:03Z) https://github.com/owlcs/owlapi
278
+
279
+ <credibility-verification> a owl:Ontology;
280
+ rdfs:comment "Ontologie enrichie et adaptée modélisant les concepts liés à la vérification de la crédibilité des sources d'information sur le Web, basée sur le rapport de modélisation UML et inspirée par l'ontologie de subvention recherche."@fr;
281
+ rdfs:label "Ontologie Système de Vérification de Sources (Adaptée Rapport + Subvention)"@fr;
282
+ owl:versionInfo "2.1" .
283
+
284
+ owl:maxCardinality a owl:AnnotationProperty .
285
+
286
+ :analyzesSource a owl:ObjectProperty;
287
+ rdfs:domain :InfoSourceAnalyse;
288
+ rdfs:range :Source;
289
+ rdfs:label "analyse source"@fr .
290
+
291
+ :appliesRule a owl:ObjectProperty, owl:FunctionalProperty;
292
+ rdfs:domain :ResultatRegle;
293
+ rdfs:range :RegleVerification;
294
+ rdfs:label "applique règle"@fr .
295
+
296
+ :assignsCredibilityLevel a owl:ObjectProperty, owl:FunctionalProperty;
297
+ rdfs:domain :RapportEvaluation;
298
+ rdfs:range :CredibilityLevel;
299
+ rdfs:comment "Lie un rapport d'évaluation au niveau de crédibilité final attribué."@fr;
300
+ rdfs:label "assigne niveau crédibilité"@fr .
301
+
302
+ :basedOnEvidence a owl:ObjectProperty;
303
+ rdfs:domain :RapportEvaluation;
304
+ rdfs:range :Evidence;
305
+ rdfs:comment "Lie un rapport d'évaluation aux preuves collectées."@fr;
306
+ rdfs:label "basé sur preuve"@fr .
307
+
308
+ :concernsCriterion a owl:ObjectProperty, owl:FunctionalProperty;
309
+ rdfs:domain :ResultatCritere;
310
+ rdfs:range :VerificationCriterion;
311
+ rdfs:label "concerne critère"@fr .
312
+
313
+ :concernsInformation a owl:ObjectProperty, owl:FunctionalProperty;
314
+ owl:inverseOf :isSubjectOfRequest;
315
+ rdfs:domain :RequeteEvaluation;
316
+ rdfs:range :InformationSoumise;
317
+ rdfs:label "concerne information"@fr .
318
+
319
+ :configuredByExpert a owl:ObjectProperty;
320
+ rdfs:domain _:genid1;
321
+ rdfs:range :Expert;
322
+ rdfs:label "configuré par expert"@fr .
323
+
324
+ _:genid1 a owl:Class;
325
+ owl:unionOf _:genid4 .
326
+
327
+ _:genid4 a rdf:List;
328
+ rdf:first :ModeleIA;
329
+ rdf:rest _:genid3 .
330
+
331
+ _:genid3 a rdf:List;
332
+ rdf:first :RegleVerification;
333
+ rdf:rest _:genid2 .
334
+
335
+ _:genid2 a rdf:List;
336
+ rdf:first :VerificationCriterion;
337
+ rdf:rest rdf:nil .
338
+
339
+ :evaluatesCriterion a owl:ObjectProperty;
340
+ rdfs:domain _:genid5;
341
+ rdfs:range :VerificationCriterion;
342
+ rdfs:comment "Lie une règle ou un modèle au critère de vérification qu'il est conçu pour évaluer."@fr;
343
+ rdfs:label "évalue critère"@fr .
344
+
345
+ _:genid5 a owl:Class;
346
+ owl:unionOf _:genid7 .
347
+
348
+ _:genid7 a rdf:List;
349
+ rdf:first :ModeleIA;
350
+ rdf:rest _:genid6 .
351
+
352
+ _:genid6 a rdf:List;
353
+ rdf:first :RegleVerification;
354
+ rdf:rest rdf:nil .
355
+
356
+ :fetchesDataFrom a owl:ObjectProperty;
357
+ rdfs:domain :RequeteEvaluation;
358
+ rdfs:range :SystemeExterne;
359
+ rdfs:label "récupère données de"@fr .
360
+
361
+ :hasAuthor a owl:ObjectProperty;
362
+ rdfs:domain :InformationSoumise;
363
+ rdfs:range :Author;
364
+ rdfs:comment "Lie une information soumise à son auteur présumé."@fr;
365
+ rdfs:label "a pour auteur"@fr .
366
+
367
+ :hasCriterionResult a owl:ObjectProperty;
368
+ rdfs:domain :RapportEvaluation;
369
+ rdfs:range :ResultatCritere;
370
+ rdfs:comment "Lie un rapport au résultat détaillé pour un critère d'évaluation spécifique."@fr;
371
+ rdfs:label "a résultat pour critère"@fr .
372
+
373
+ :hasOriginalSource a owl:ObjectProperty;
374
+ rdfs:domain :InformationSoumise;
375
+ rdfs:range :Source;
376
+ rdfs:comment "Lie une information soumise à sa source d'origine principale."@fr;
377
+ rdfs:label "a pour source originale"@fr .
378
+
379
+ :includesNLPResult a owl:ObjectProperty;
380
+ rdfs:domain :RapportEvaluation;
381
+ rdfs:range :ResultatNLP;
382
+ rdfs:label "inclut résultat NLP"@fr .
383
+
384
+ :includesRuleResult a owl:ObjectProperty;
385
+ rdfs:domain :RapportEvaluation;
386
+ rdfs:range :ResultatRegle;
387
+ rdfs:label "inclut résultat règle"@fr .
388
+
389
+ :includesSourceAnalysis a owl:ObjectProperty;
390
+ rdfs:domain :RapportEvaluation;
391
+ rdfs:range :InfoSourceAnalyse;
392
+ rdfs:label "inclut analyse source"@fr .
393
+
394
+ :isReportOf a owl:ObjectProperty, owl:InverseFunctionalProperty;
395
+ owl:inverseOf :producesReport;
396
+ rdfs:domain :RapportEvaluation;
397
+ rdfs:range :RequeteEvaluation;
398
+ rdfs:label "est rapport de"@fr .
399
+
400
+ :isSubjectOfRequest a owl:ObjectProperty;
401
+ rdfs:domain :InformationSoumise;
402
+ rdfs:range :RequeteEvaluation;
403
+ rdfs:label "est sujet de requête"@fr .
404
+
405
+ :obtainedVia a owl:ObjectProperty;
406
+ rdfs:domain :ResultatCritere;
407
+ rdfs:range _:genid8;
408
+ rdfs:label "obtenu via"@fr .
409
+
410
+ _:genid8 a owl:Class;
411
+ owl:unionOf _:genid10 .
412
+
413
+ _:genid10 a rdf:List;
414
+ rdf:first :ResultatNLP;
415
+ rdf:rest _:genid9 .
416
+
417
+ _:genid9 a rdf:List;
418
+ rdf:first :ResultatRegle;
419
+ rdf:rest rdf:nil .
420
+
421
+ :originatesFrom a owl:ObjectProperty;
422
+ rdfs:domain :Evidence;
423
+ rdfs:range :Source;
424
+ rdfs:comment "Lie une preuve à la source d'où elle a été extraite."@fr;
425
+ rdfs:label "provient de"@fr .
426
+
427
+ :producesReport a owl:ObjectProperty, owl:FunctionalProperty;
428
+ rdfs:domain :RequeteEvaluation;
429
+ rdfs:range :RapportEvaluation;
430
+ rdfs:label "produit rapport"@fr .
431
+
432
+ :submitsRequest a owl:ObjectProperty;
433
+ owl:inverseOf :submittedBy;
434
+ rdfs:domain :User;
435
+ rdfs:range :RequeteEvaluation;
436
+ rdfs:label "soumet requête"@fr .
437
+
438
+ :submittedBy a owl:ObjectProperty, owl:FunctionalProperty;
439
+ rdfs:domain :RequeteEvaluation;
440
+ rdfs:range :User;
441
+ rdfs:comment "Lie une requête de vérification à l'utilisateur qui l'a soumise."@fr;
442
+ rdfs:label "soumise par"@fr .
443
+
444
+ :usesModel a owl:ObjectProperty, owl:FunctionalProperty;
445
+ rdfs:domain :ResultatNLP;
446
+ rdfs:range :ModeleIA;
447
+ rdfs:label "utilise modèle"@fr .
448
+
449
+ :authorName a owl:DatatypeProperty;
450
+ rdfs:domain :Author;
451
+ rdfs:range xsd:string;
452
+ rdfs:label "nom de l'auteur"@fr .
453
+
454
+ :coherenceScore a owl:DatatypeProperty;
455
+ rdfs:domain :ResultatNLP;
456
+ rdfs:range xsd:float;
457
+ rdfs:label "score cohérence"@fr .
458
+
459
+ :completionTimestamp a owl:DatatypeProperty, owl:FunctionalProperty;
460
+ rdfs:domain :RapportEvaluation;
461
+ rdfs:range xsd:dateTime;
462
+ rdfs:label "horodatage de complétion"@fr .
463
+
464
+ :credibilityLevelValue a owl:DatatypeProperty, owl:FunctionalProperty;
465
+ rdfs:domain :CredibilityLevel;
466
+ rdfs:range xsd:float;
467
+ rdfs:label "valeur numérique niveau"@fr .
468
+
469
+ :credibilityScoreValue a owl:DatatypeProperty, owl:FunctionalProperty;
470
+ rdfs:domain :RapportEvaluation;
471
+ rdfs:range xsd:float;
472
+ rdfs:label "valeur score crédibilité"@fr .
473
+
474
+ :criterionResultConfidence a owl:DatatypeProperty;
475
+ rdfs:domain :ResultatCritere;
476
+ rdfs:range xsd:float;
477
+ rdfs:label "confiance résultat critère"@fr .
478
+
479
+ :criterionResultValue a owl:DatatypeProperty;
480
+ rdfs:domain :ResultatCritere;
481
+ rdfs:range xsd:string;
482
+ rdfs:label "valeur résultat critère"@fr .
483
+
484
+ :detectedBiases a owl:DatatypeProperty;
485
+ rdfs:domain :ResultatNLP;
486
+ rdfs:range xsd:string;
487
+ rdfs:comment "";
488
+ rdfs:label "biais détectés"@fr .
489
+
490
+ :evidenceSnippet a owl:DatatypeProperty;
491
+ rdfs:domain :Evidence;
492
+ rdfs:range xsd:string;
493
+ rdfs:label "extrait de la preuve"@fr .
494
+
495
+ :evidenceURL a owl:DatatypeProperty;
496
+ rdfs:domain :Evidence;
497
+ rdfs:range xsd:anyURI;
498
+ rdfs:label "URL de la preuve"@fr .
499
+
500
+ :informationContent a owl:DatatypeProperty;
501
+ rdfs:domain :InformationSoumise;
502
+ rdfs:range xsd:string;
503
+ rdfs:label "contenu de l'information"@fr .
504
+
505
+ :informationURL a owl:DatatypeProperty;
506
+ rdfs:domain :InformationSoumise;
507
+ rdfs:range xsd:anyURI;
508
+ rdfs:label "URL de l'information"@fr .
509
+
510
+ :modelName a owl:DatatypeProperty;
511
+ rdfs:domain :ModeleIA;
512
+ rdfs:range xsd:string;
513
+ rdfs:label "nom modèle"@fr .
514
+
515
+ :modelType a owl:DatatypeProperty;
516
+ rdfs:domain :ModeleIA;
517
+ rdfs:range xsd:string;
518
+ rdfs:label "type modèle"@fr .
519
+
520
+ :reportSummary a owl:DatatypeProperty;
521
+ rdfs:domain :RapportEvaluation;
522
+ rdfs:range xsd:string;
523
+ rdfs:label "résumé du rapport"@fr .
524
+
525
+ :requestStatus a owl:DatatypeProperty, owl:FunctionalProperty;
526
+ rdfs:domain :RequeteEvaluation;
527
+ rdfs:range xsd:string;
528
+ rdfs:label "statut requête"@fr .
529
+
530
+ :ruleDescription a owl:DatatypeProperty;
531
+ rdfs:domain :RegleVerification;
532
+ rdfs:range xsd:string;
533
+ rdfs:label "description règle"@fr .
534
+
535
+ :ruleLogic a owl:DatatypeProperty;
536
+ rdfs:domain :RegleVerification;
537
+ rdfs:range xsd:string;
538
+ rdfs:label "logique règle"@fr .
539
+
540
+ :ruleResultValid a owl:DatatypeProperty;
541
+ rdfs:domain :ResultatRegle;
542
+ rdfs:range xsd:boolean;
543
+ rdfs:label "résultat règle valide"@fr .
544
+
545
+ :ruleWeight a owl:DatatypeProperty;
546
+ rdfs:domain :RegleVerification;
547
+ rdfs:range xsd:float;
548
+ rdfs:label "poids règle"@fr .
549
+
550
+ :sentimentScore a owl:DatatypeProperty;
551
+ rdfs:domain :ResultatNLP;
552
+ rdfs:range xsd:float;
553
+ rdfs:label "score sentiment"@fr .
554
+
555
+ :sourceAnalyzedReputation a owl:DatatypeProperty;
556
+ rdfs:domain :InfoSourceAnalyse;
557
+ rdfs:range xsd:string;
558
+ rdfs:label "réputation source analysée"@fr .
559
+
560
+ :sourceAnalyzedURL a owl:DatatypeProperty;
561
+ rdfs:domain :InfoSourceAnalyse;
562
+ rdfs:range xsd:anyURI;
563
+ rdfs:label "URL source analysée"@fr .
564
+
565
+ :sourceMentionsCount a owl:DatatypeProperty;
566
+ rdfs:domain :InfoSourceAnalyse;
567
+ rdfs:range xsd:integer;
568
+ rdfs:label "mentions source analysée"@fr .
569
+
570
+ :sourceReputationScore a owl:DatatypeProperty;
571
+ rdfs:domain :Source;
572
+ rdfs:range xsd:float;
573
+ rdfs:label "score de réputation de la source"@fr .
574
+
575
+ :sourceURL a owl:DatatypeProperty, owl:FunctionalProperty;
576
+ rdfs:domain :Source;
577
+ rdfs:range xsd:anyURI;
578
+ rdfs:label "URL de la source"@fr .
579
+
580
+ :submissionTimestamp a owl:DatatypeProperty, owl:FunctionalProperty;
581
+ rdfs:domain :RequeteEvaluation;
582
+ rdfs:range xsd:dateTime;
583
+ rdfs:label "horodatage de soumission"@fr .
584
+
585
+ :userName a owl:DatatypeProperty;
586
+ rdfs:domain :User;
587
+ rdfs:range xsd:string;
588
+ rdfs:label "nom d'utilisateur"@fr .
589
+
590
+ :AcademicJournal a owl:Class;
591
+ rdfs:subClassOf :Source;
592
+ rdfs:label "Revue Académique"@fr .
593
+
594
+ :ApiLLM a owl:Class;
595
+ rdfs:subClassOf :SystemeExterne;
596
+ rdfs:label "API de LLM"@fr .
597
+
598
+ :Author a owl:Class;
599
+ rdfs:comment "Représente la personne ou l'entité créditée pour la création de l'information soumise."@fr;
600
+ rdfs:label "Auteur"@fr .
601
+
602
+ :BaseDeFaits a owl:Class;
603
+ rdfs:subClassOf :SystemeExterne;
604
+ rdfs:label "Base de Données de Faits Vérifiés"@fr .
605
+
606
+ :CredibilityLevel a owl:Class;
607
+ rdfs:comment "Représente le niveau de crédibilité qualitatif ou quantitatif attribué dans le rapport."@fr;
608
+ rdfs:label "Niveau de Crédibilité"@fr .
609
+
610
+ :Evidence a owl:Class;
611
+ rdfs:comment "Représente un élément d'information externe utilisé pour étayer ou réfuter l'information vérifiée."@fr;
612
+ rdfs:label "Preuve"@fr .
613
+
614
+ :Expert a owl:Class;
615
+ rdfs:subClassOf :User;
616
+ rdfs:comment "Utilisateur qualifié responsable de la configuration et de l'amélioration du système (règles, modèles)."@fr;
617
+ rdfs:label "Expert"@fr .
618
+
619
+ :FactCheckingOrganization a owl:Class;
620
+ rdfs:subClassOf :Source;
621
+ rdfs:label "Organisation de Vérification des Faits"@fr .
622
+
623
+ :InfoSourceAnalyse a owl:Class;
624
+ rdfs:subClassOf _:genid11;
625
+ rdfs:comment "Détails sur une source spécifique telle qu'analysée et présentée dans le rapport."@fr;
626
+ rdfs:label "Information Source Analysée"@fr .
627
+
628
+ _:genid11 a owl:Restriction;
629
+ owl:cardinality "1"^^xsd:nonNegativeInteger;
630
+ owl:onProperty :analyzesSource .
631
+
632
+ :InformationFaibleCredibilite a owl:Class;
633
+ owl:equivalentClass _:genid12;
634
+ rdfs:subClassOf _:genid22;
635
+ rdfs:label "Information Faiblement Crédible"@fr .
636
+
637
+ _:genid12 a owl:Class;
638
+ owl:intersectionOf _:genid21 .
639
+
640
+ _:genid21 a rdf:List;
641
+ rdf:first :InformationVerifiee;
642
+ rdf:rest _:genid19 .
643
+
644
+ _:genid19 a rdf:List;
645
+ rdf:first _:genid20;
646
+ rdf:rest _:genid17 .
647
+
648
+ _:genid17 a rdf:List;
649
+ rdf:first _:genid18;
650
+ rdf:rest _:genid13 .
651
+
652
+ _:genid13 a rdf:List;
653
+ rdf:first _:genid14;
654
+ rdf:rest rdf:nil .
655
+
656
+ _:genid14 a owl:Restriction;
657
+ owl:someValuesFrom _:genid15;
658
+ owl:onProperty :isSubjectOfRequest .
659
+
660
+ _:genid15 a owl:Restriction;
661
+ owl:someValuesFrom _:genid16;
662
+ owl:onProperty :producesReport .
663
+
664
+ _:genid16 a owl:Restriction;
665
+ owl:hasValue :Niveau_Bas;
666
+ owl:onProperty :assignsCredibilityLevel .
667
+
668
+ _:genid18 a owl:Class;
669
+ owl:complementOf :InformationMoyenneCredibilite .
670
+
671
+ _:genid20 a owl:Class;
672
+ owl:complementOf :InformationHauteCredibilite .
673
+
674
+ _:genid22 a owl:Restriction;
675
+ owl:allValuesFrom _:genid23;
676
+ owl:onProperty :isSubjectOfRequest .
677
+
678
+ _:genid23 a owl:Restriction;
679
+ owl:allValuesFrom _:genid24;
680
+ owl:onProperty :producesReport .
681
+
682
+ _:genid24 a owl:Restriction;
683
+ owl:hasValue :Niveau_Bas;
684
+ owl:onProperty :assignsCredibilityLevel .
685
+
686
+ :InformationHauteCredibilite a owl:Class;
687
+ owl:equivalentClass _:genid25;
688
+ rdfs:subClassOf _:genid31;
689
+ rdfs:label "Information Hautement Crédible"@fr .
690
+
691
+ _:genid25 a owl:Class;
692
+ owl:intersectionOf _:genid30 .
693
+
694
+ _:genid30 a rdf:List;
695
+ rdf:first :InformationVerifiee;
696
+ rdf:rest _:genid26 .
697
+
698
+ _:genid26 a rdf:List;
699
+ rdf:first _:genid27;
700
+ rdf:rest rdf:nil .
701
+
702
+ _:genid27 a owl:Restriction;
703
+ owl:someValuesFrom _:genid28;
704
+ owl:onProperty :isSubjectOfRequest .
705
+
706
+ _:genid28 a owl:Restriction;
707
+ owl:someValuesFrom _:genid29;
708
+ owl:onProperty :producesReport .
709
+
710
+ _:genid29 a owl:Restriction;
711
+ owl:hasValue :Niveau_Haut;
712
+ owl:onProperty :assignsCredibilityLevel .
713
+
714
+ _:genid31 a owl:Restriction;
715
+ owl:allValuesFrom _:genid32;
716
+ owl:onProperty :isSubjectOfRequest .
717
+
718
+ _:genid32 a owl:Restriction;
719
+ owl:allValuesFrom _:genid33;
720
+ owl:onProperty :producesReport .
721
+
722
+ _:genid33 a owl:Restriction;
723
+ owl:hasValue :Niveau_Haut;
724
+ owl:onProperty :assignsCredibilityLevel .
725
+
726
+ :InformationMoyenneCredibilite a owl:Class;
727
+ owl:equivalentClass _:genid34;
728
+ rdfs:subClassOf _:genid42;
729
+ rdfs:label "Information Moyennement Crédible"@fr .
730
+
731
+ _:genid34 a owl:Class;
732
+ owl:intersectionOf _:genid41 .
733
+
734
+ _:genid41 a rdf:List;
735
+ rdf:first :InformationVerifiee;
736
+ rdf:rest _:genid39 .
737
+
738
+ _:genid39 a rdf:List;
739
+ rdf:first _:genid40;
740
+ rdf:rest _:genid35 .
741
+
742
+ _:genid35 a rdf:List;
743
+ rdf:first _:genid36;
744
+ rdf:rest rdf:nil .
745
+
746
+ _:genid36 a owl:Restriction;
747
+ owl:someValuesFrom _:genid37;
748
+ owl:onProperty :isSubjectOfRequest .
749
+
750
+ _:genid37 a owl:Restriction;
751
+ owl:someValuesFrom _:genid38;
752
+ owl:onProperty :producesReport .
753
+
754
+ _:genid38 a owl:Restriction;
755
+ owl:hasValue :Niveau_Moyen;
756
+ owl:onProperty :assignsCredibilityLevel .
757
+
758
+ _:genid40 a owl:Class;
759
+ owl:complementOf :InformationHauteCredibilite .
760
+
761
+ _:genid42 a owl:Restriction;
762
+ owl:allValuesFrom _:genid43;
763
+ owl:onProperty :isSubjectOfRequest .
764
+
765
+ _:genid43 a owl:Restriction;
766
+ owl:allValuesFrom _:genid44;
767
+ owl:onProperty :producesReport .
768
+
769
+ _:genid44 a owl:Restriction;
770
+ owl:hasValue :Niveau_Moyen;
771
+ owl:onProperty :assignsCredibilityLevel .
772
+
773
+ :InformationSoumise a owl:Class;
774
+ rdfs:comment "Représente l'unité d'information (texte, URL) telle que soumise pour vérification."@fr;
775
+ rdfs:label "Information Soumise"@fr .
776
+
777
+ :InformationVerifiee a owl:Class;
778
+ owl:equivalentClass _:genid45;
779
+ rdfs:label "Information Vérifiée"@fr .
780
+
781
+ _:genid45 a owl:Class;
782
+ owl:intersectionOf _:genid49 .
783
+
784
+ _:genid49 a rdf:List;
785
+ rdf:first :InformationSoumise;
786
+ rdf:rest _:genid46 .
787
+
788
+ _:genid46 a rdf:List;
789
+ rdf:first _:genid47;
790
+ rdf:rest rdf:nil .
791
+
792
+ _:genid47 a owl:Restriction;
793
+ owl:someValuesFrom _:genid48;
794
+ owl:onProperty :isSubjectOfRequest .
795
+
796
+ _:genid48 a owl:Restriction;
797
+ owl:someValuesFrom :RapportEvaluation;
798
+ owl:onProperty :producesReport .
799
+
800
+ :ModeleIA a owl:Class;
801
+ rdfs:subClassOf :VerificationMethod, _:genid50;
802
+ rdfs:comment "Représente un modèle d'apprentissage automatique utilisé pour l'analyse sémantique ou autre."@fr;
803
+ rdfs:label "Modèle IA/NLP"@fr .
804
+
805
+ _:genid50 a owl:Restriction;
806
+ owl:minCardinality "1"^^xsd:nonNegativeInteger;
807
+ owl:onProperty :evaluatesCriterion .
808
+
809
+ :MoteurRecherche a owl:Class;
810
+ rdfs:subClassOf :SystemeExterne;
811
+ rdfs:label "Moteur de Recherche"@fr .
812
+
813
+ :NewsWebsite a owl:Class;
814
+ rdfs:subClassOf :Source;
815
+ rdfs:label "Site d'actualités"@fr .
816
+
817
+ :Niveau_Bas a owl:Class, owl:NamedIndividual, :CredibilityLevel;
818
+ :credibilityLevelValue "0.2"^^xsd:float;
819
+ rdfs:label "Crédibilité Faible"@fr .
820
+
821
+ :Niveau_Haut a owl:Class, owl:NamedIndividual, :CredibilityLevel;
822
+ :credibilityLevelValue "0.8"^^xsd:float;
823
+ rdfs:label "Crédibilité Élevée"@fr .
824
+
825
+ :Niveau_Moyen a owl:Class, owl:NamedIndividual, :CredibilityLevel;
826
+ :credibilityLevelValue "0.5"^^xsd:float;
827
+ rdfs:label "Crédibilité Moyenne"@fr .
828
+
829
+ :Niveau_NonVerifie a owl:Class, owl:NamedIndividual, :CredibilityLevel;
830
+ rdfs:label "Non Vérifié"@fr .
831
+
832
+ :PersonalBlog a owl:Class;
833
+ rdfs:subClassOf :Source;
834
+ rdfs:label "Blog Personnel"@fr .
835
+
836
+ :RapportEvaluation a owl:Class;
837
+ rdfs:subClassOf _:genid51;
838
+ rdfs:comment "Encapsule les résultats complets du processus de vérification pour une requête donnée."@fr;
839
+ rdfs:label "Rapport d'Évaluation"@fr .
840
+
841
+ _:genid51 a owl:Restriction;
842
+ owl:cardinality "1"^^xsd:nonNegativeInteger;
843
+ owl:onProperty :assignsCredibilityLevel .
844
+
845
+ :RefutingEvidence a owl:Class;
846
+ rdfs:subClassOf :Evidence;
847
+ owl:disjointWith :SupportingEvidence;
848
+ rdfs:label "Preuve réfutante"@fr .
849
+
850
+ :RegleVerification a owl:Class;
851
+ rdfs:subClassOf :VerificationMethod, _:genid52;
852
+ rdfs:comment "Représente une règle logique prédéfinie utilisée pour évaluer un aspect de la crédibilité."@fr;
853
+ rdfs:label "Règle de Vérification"@fr .
854
+
855
+ _:genid52 a owl:Restriction;
856
+ owl:minCardinality "1"^^xsd:nonNegativeInteger;
857
+ owl:onProperty :evaluatesCriterion .
858
+
859
+ :RequeteEvaluation a owl:Class;
860
+ rdfs:subClassOf _:genid53, _:genid54, _:genid55;
861
+ rdfs:comment "Représente une demande spécifique de vérification de crédibilité soumise par un utilisateur."@fr;
862
+ rdfs:label "Requête d'Évaluation"@fr .
863
+
864
+ _:genid53 a owl:Restriction;
865
+ owl:minCardinality "0"^^xsd:nonNegativeInteger;
866
+ owl:onProperty :producesReport .
867
+
868
+ _:genid54 a owl:Restriction;
869
+ owl:cardinality "1"^^xsd:nonNegativeInteger;
870
+ owl:onProperty :concernsInformation .
871
+
872
+ _:genid55 a owl:Restriction;
873
+ owl:cardinality "1"^^xsd:nonNegativeInteger;
874
+ owl:onProperty :submittedBy .
875
+
876
+ :ResultatCritere a owl:Class;
877
+ rdfs:subClassOf _:genid56, _:genid57;
878
+ rdfs:comment "Représente le résultat de l'évaluation d'un critère spécifique pour une requête, potentiellement basé sur un ou plusieurs résultats de règles/NLP."@fr;
879
+ rdfs:label "Résultat Critère"@fr .
880
+
881
+ _:genid56 a owl:Restriction;
882
+ owl:minCardinality "1"^^xsd:nonNegativeInteger;
883
+ owl:onProperty :obtainedVia .
884
+
885
+ _:genid57 a owl:Restriction;
886
+ owl:cardinality "1"^^xsd:nonNegativeInteger;
887
+ owl:onProperty :concernsCriterion .
888
+
889
+ :ResultatNLP a owl:Class;
890
+ rdfs:subClassOf :ResultatVerification, _:genid58;
891
+ owl:disjointWith :ResultatRegle;
892
+ rdfs:comment "Résultat de l'analyse effectuée par un modèle IA/NLP."@fr;
893
+ rdfs:label "Résultat NLP"@fr .
894
+
895
+ _:genid58 a owl:Restriction;
896
+ owl:cardinality "1"^^xsd:nonNegativeInteger;
897
+ owl:onProperty :usesModel .
898
+
899
+ :ResultatRegle a owl:Class;
900
+ rdfs:subClassOf :ResultatVerification, _:genid59;
901
+ rdfs:comment "Résultat de l'application d'une règle de vérification spécifique."@fr;
902
+ rdfs:label "Résultat Règle"@fr .
903
+
904
+ _:genid59 a owl:Restriction;
905
+ owl:cardinality "1"^^xsd:nonNegativeInteger;
906
+ owl:onProperty :appliesRule .
907
+
908
+ :ResultatVerification a owl:Class;
909
+ rdfs:comment "Classe parente pour les résultats issus des différentes méthodes de vérification."@fr;
910
+ rdfs:label "Résultat de Vérification (Interne)"@fr .
911
+
912
+ :SocialMediaPlatform a owl:Class;
913
+ rdfs:subClassOf :Source;
914
+ rdfs:label "Plateforme de Média Social"@fr .
915
+
916
+ :Source a owl:Class;
917
+ rdfs:comment "Représente une entité (site web, organisation, personne) d'où provient l'information originale ou la preuve."@fr;
918
+ rdfs:label "Source"@fr .
919
+
920
+ :SupportingEvidence a owl:Class;
921
+ rdfs:subClassOf :Evidence;
922
+ rdfs:label "Preuve à l'appui"@fr .
923
+
924
+ :SystemeExterne a owl:Class;
925
+ rdfs:comment "Représente une source de données ou un service externe utilisé pendant le processus de vérification (API, base de données)."@fr;
926
+ rdfs:label "Système Externe"@fr .
927
+
928
+ :User a owl:Class;
929
+ rdfs:comment "Représente une personne interagissant avec le système de vérification."@fr;
930
+ rdfs:label "Utilisateur"@fr .
931
+
932
+ :VerificationCriterion a owl:Class;
933
+ rdfs:comment "Aspect spécifique évalué lors de la vérification (ex: réputation de la source, cohérence)."@fr;
934
+ rdfs:label "Critère de Vérification"@fr .
935
+
936
+ :VerificationMethod a owl:Class;
937
+ rdfs:comment "Représente une approche (règle, modèle IA) utilisée pour évaluer la crédibilité."@fr;
938
+ rdfs:label "Méthode de Vérification"@fr .
939
+
940
+ :Criteria_AuthorExpertise a owl:NamedIndividual, :VerificationCriterion;
941
+ rdfs:label "Expertise de l'auteur"@fr .
942
+
943
+ :Criteria_CoherenceAnalysis a owl:NamedIndividual, :VerificationCriterion;
944
+ rdfs:label "Analyse de la cohérence"@fr .
945
+
946
+ :Criteria_CrossReferencing a owl:NamedIndividual, :VerificationCriterion;
947
+ rdfs:label "Références croisées"@fr .
948
+
949
+ :Criteria_FactCheckDB a owl:NamedIndividual, :VerificationCriterion;
950
+ rdfs:label "Consultation base de données Fact-Check"@fr .
951
+
952
+ :Criteria_SourceReputation a owl:NamedIndividual, :VerificationCriterion;
953
+ rdfs:label "Réputation de la source"@fr .
954
+
955
+ :Criteria_ToneAnalysis a owl:NamedIndividual, :VerificationCriterion;
956
+ rdfs:label "Analyse du ton (ex: neutre, biaisé)"@fr .
957
+
958
+ _:genid60 owl:maxCardinality "1"^^xsd:nonNegativeInteger .
959
+
960
+ _:genid61 a owl:AllDisjointClasses;
961
+ owl:members _:genid66 .
962
+
963
+ _:genid66 a rdf:List;
964
+ rdf:first :AcademicJournal;
965
+ rdf:rest _:genid65 .
966
+
967
+ _:genid65 a rdf:List;
968
+ rdf:first :FactCheckingOrganization;
969
+ rdf:rest _:genid64 .
970
+
971
+ _:genid64 a rdf:List;
972
+ rdf:first :NewsWebsite;
973
+ rdf:rest _:genid63 .
974
+
975
+ _:genid63 a rdf:List;
976
+ rdf:first :PersonalBlog;
977
+ rdf:rest _:genid62 .
978
+
979
+ _:genid62 a rdf:List;
980
+ rdf:first :SocialMediaPlatform;
981
+ rdf:rest rdf:nil .
982
+
983
+ _:genid67 a owl:AllDisjointClasses;
984
+ owl:members _:genid70 .
985
+
986
+ _:genid70 a rdf:List;
987
+ rdf:first :ApiLLM;
988
+ rdf:rest _:genid69 .
989
+
990
+ _:genid69 a rdf:List;
991
+ rdf:first :BaseDeFaits;
992
+ rdf:rest _:genid68 .
993
+
994
+ _:genid68 a rdf:List;
995
+ rdf:first :MoteurRecherche;
996
+ rdf:rest rdf:nil .
997
+
998
+ _:genid71 a owl:AllDisjointClasses;
999
+ owl:members _:genid74 .
1000
+
1001
+ _:genid74 a rdf:List;
1002
+ rdf:first :InformationFaibleCredibilite;
1003
+ rdf:rest _:genid73 .
1004
+
1005
+ _:genid73 a rdf:List;
1006
+ rdf:first :InformationHauteCredibilite;
1007
+ rdf:rest _:genid72 .
1008
+
1009
+ _:genid72 a rdf:List;
1010
+ rdf:first :InformationMoyenneCredibilite;
1011
+ rdf:rest rdf:nil .
1012
+
1013
+ _:genid75 a owl:AllDisjointClasses;
1014
+ owl:members _:genid79 .
1015
+
1016
+ _:genid79 a rdf:List;
1017
+ rdf:first :Niveau_Bas;
1018
+ rdf:rest _:genid78 .
1019
+
1020
+ _:genid78 a rdf:List;
1021
+ rdf:first :Niveau_Haut;
1022
+ rdf:rest _:genid77 .
1023
+
1024
+ _:genid77 a rdf:List;
1025
+ rdf:first :Niveau_Moyen;
1026
+ rdf:rest _:genid76 .
1027
+
1028
+ _:genid76 a rdf:List;
1029
+ rdf:first :Niveau_NonVerifie;
1030
+ rdf:rest rdf:nil .
requirements.txt CHANGED
@@ -1,20 +1,24 @@
1
- # SysCRED - Requirements
2
  # Système Hybride de Vérification de Crédibilité
3
  # (c) Dominique S. Loyer
 
4
 
5
  # === Core Dependencies ===
6
  requests>=2.28.0
7
  beautifulsoup4>=4.11.0
8
  python-whois>=0.8.0
 
9
 
10
  # === RDF/Ontology ===
11
  rdflib>=6.0.0
12
 
13
  # === Machine Learning ===
14
- transformers>=4.30.0,<5
15
  torch>=2.0.0
16
- numpy>=1.24.0,<2
17
  sentence-transformers>=2.2.0
 
 
18
 
19
  # === Explainability ===
20
  lime>=0.2.0
@@ -28,7 +32,7 @@ pandas>=2.0.0
28
  # === Production/Database ===
29
  gunicorn>=20.1.0
30
  psycopg2-binary>=2.9.0
31
- flask_sqlalchemy>=3.0.0
32
 
33
  # === Development/Testing ===
34
  pytest>=7.0.0
 
1
+ # SysCRED - Requirements (Full version with ML models)
2
  # Système Hybride de Vérification de Crédibilité
3
  # (c) Dominique S. Loyer
4
+ # Version complète pour HuggingFace Spaces et développement local
5
 
6
  # === Core Dependencies ===
7
  requests>=2.28.0
8
  beautifulsoup4>=4.11.0
9
  python-whois>=0.8.0
10
+ lxml>=4.9.0
11
 
12
  # === RDF/Ontology ===
13
  rdflib>=6.0.0
14
 
15
  # === Machine Learning ===
16
+ transformers>=4.30.0
17
  torch>=2.0.0
18
+ numpy>=1.24.0
19
  sentence-transformers>=2.2.0
20
+ accelerate>=0.20.0
21
+ spacy>=3.6.0
22
 
23
  # === Explainability ===
24
  lime>=0.2.0
 
32
  # === Production/Database ===
33
  gunicorn>=20.1.0
34
  psycopg2-binary>=2.9.0
35
+ flask-sqlalchemy>=3.0.0
36
 
37
  # === Development/Testing ===
38
  pytest>=7.0.0
syscred/__init__.py CHANGED
@@ -9,17 +9,19 @@ Citation Key: loyerModelingHybridSystem2025
9
  Modules:
10
  - api_clients: Web scraping, WHOIS, Fact Check APIs
11
  - ir_engine: BM25, QLD, TF-IDF, PRF (from TREC)
12
- - trec_retriever: Evidence retrieval for fact-checking (NEW v2.3)
13
- - trec_dataset: TREC AP88-90 data loader (NEW v2.3)
14
- - liar_dataset: LIAR benchmark dataset loader (NEW v2.3)
15
  - seo_analyzer: SEO analysis, PageRank estimation
16
  - eval_metrics: MAP, NDCG, P@K, Recall, MRR
17
  - ontology_manager: RDFLib integration
18
  - verification_system: Main credibility pipeline
19
- - graph_rag: GraphRAG for contextual memory (enhanced v2.3)
 
 
20
  """
21
 
22
- __version__ = "2.3.1"
23
  __author__ = "Dominique S. Loyer"
24
  __citation__ = "loyerModelingHybridSystem2025"
25
 
@@ -32,11 +34,15 @@ from syscred.ir_engine import IREngine
32
  from syscred.eval_metrics import EvaluationMetrics
33
  from syscred.graph_rag import GraphRAG
34
 
35
- # TREC Integration (NEW - Feb 2026)
 
 
 
 
36
  from syscred.trec_retriever import TRECRetriever, Evidence, RetrievalResult
37
  from syscred.trec_dataset import TRECDataset, TRECTopic
38
 
39
- # LIAR Benchmark (NEW - Feb 2026)
40
  from syscred.liar_dataset import LIARDataset, LiarStatement, LiarLabel
41
 
42
  # Convenience alias
@@ -52,13 +58,17 @@ __all__ = [
52
  'IREngine',
53
  'EvaluationMetrics',
54
  'GraphRAG',
55
- # TREC (NEW)
 
 
 
 
56
  'TRECRetriever',
57
  'TRECDataset',
58
  'TRECTopic',
59
  'Evidence',
60
  'RetrievalResult',
61
- # LIAR Benchmark (NEW)
62
  'LIARDataset',
63
  'LiarStatement',
64
  'LiarLabel',
 
9
  Modules:
10
  - api_clients: Web scraping, WHOIS, Fact Check APIs
11
  - ir_engine: BM25, QLD, TF-IDF, PRF (from TREC)
12
+ - trec_retriever: Evidence retrieval for fact-checking (v2.3)
13
+ - trec_dataset: TREC AP88-90 data loader (v2.3)
14
+ - liar_dataset: LIAR benchmark dataset loader (v2.3)
15
  - seo_analyzer: SEO analysis, PageRank estimation
16
  - eval_metrics: MAP, NDCG, P@K, Recall, MRR
17
  - ontology_manager: RDFLib integration
18
  - verification_system: Main credibility pipeline
19
+ - graph_rag: GraphRAG for contextual memory (v2.3)
20
+ - ner_analyzer: Named Entity Recognition with spaCy (v2.4)
21
+ - eeat_calculator: Google E-E-A-T metrics (v2.4)
22
  """
23
 
24
+ __version__ = "2.4.0"
25
  __author__ = "Dominique S. Loyer"
26
  __citation__ = "loyerModelingHybridSystem2025"
27
 
 
34
  from syscred.eval_metrics import EvaluationMetrics
35
  from syscred.graph_rag import GraphRAG
36
 
37
+ # NER and E-E-A-T (NEW - v2.4)
38
+ from syscred.ner_analyzer import NERAnalyzer
39
+ from syscred.eeat_calculator import EEATCalculator, EEATScore
40
+
41
+ # TREC Integration (v2.3)
42
  from syscred.trec_retriever import TRECRetriever, Evidence, RetrievalResult
43
  from syscred.trec_dataset import TRECDataset, TRECTopic
44
 
45
+ # LIAR Benchmark (v2.3)
46
  from syscred.liar_dataset import LIARDataset, LiarStatement, LiarLabel
47
 
48
  # Convenience alias
 
58
  'IREngine',
59
  'EvaluationMetrics',
60
  'GraphRAG',
61
+ # NER & E-E-A-T (NEW v2.4)
62
+ 'NERAnalyzer',
63
+ 'EEATCalculator',
64
+ 'EEATScore',
65
+ # TREC (v2.3)
66
  'TRECRetriever',
67
  'TRECDataset',
68
  'TRECTopic',
69
  'Evidence',
70
  'RetrievalResult',
71
+ # LIAR Benchmark (v2.3)
72
  'LIARDataset',
73
  'LiarStatement',
74
  'LiarLabel',
syscred/backend_app.py CHANGED
@@ -22,15 +22,16 @@ import traceback
22
  from pathlib import Path
23
  try:
24
  from dotenv import load_dotenv
25
- env_path = Path(__file__).parent / '.env'
26
- try:
27
- if env_path.exists():
28
- load_dotenv(env_path)
29
- print(f"[SysCRED Backend] Loaded .env from {env_path}")
30
- else:
31
- print(f"[SysCRED Backend] No .env file found at {env_path}")
32
- except PermissionError:
33
- print(f"[SysCRED Backend] Permission denied for {env_path}, using system env vars")
 
34
  except ImportError:
35
  print("[SysCRED Backend] python-dotenv not installed, using system env vars")
36
 
@@ -88,6 +89,16 @@ except ImportError as e:
88
  app = Flask(__name__)
89
  CORS(app) # Enable CORS for frontend
90
 
 
 
 
 
 
 
 
 
 
 
91
  # Initialize Database
92
  try:
93
  init_db(app) # [NEW] Setup DB connection
@@ -270,6 +281,62 @@ def verify_endpoint():
270
 
271
  print(f"[SysCRED Backend] Score: {result.get('scoreCredibilite', 'N/A')}")
272
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
  # [NEW] Persist to Database
274
  try:
275
  new_analysis = AnalysisResult(
 
22
  from pathlib import Path
23
  try:
24
  from dotenv import load_dotenv
25
+ # .env is at project root (parent of syscred/)
26
+ env_path = Path(__file__).resolve().parent.parent / '.env'
27
+ if not env_path.exists():
28
+ # Fallback: check syscred/ directory
29
+ env_path = Path(__file__).parent / '.env'
30
+ if env_path.exists():
31
+ load_dotenv(env_path)
32
+ print(f"[SysCRED Backend] Loaded .env from {env_path}")
33
+ else:
34
+ print(f"[SysCRED Backend] No .env file found, using system env vars")
35
  except ImportError:
36
  print("[SysCRED Backend] python-dotenv not installed, using system env vars")
37
 
 
89
  app = Flask(__name__)
90
  CORS(app) # Enable CORS for frontend
91
 
92
+ # Allow iframe embedding on UQAM domains (for syscred.uqam.ca mirror)
93
+ @app.after_request
94
+ def add_security_headers(response):
95
+ """Add security headers allowing UQAM iframe embedding."""
96
+ response.headers['X-Frame-Options'] = 'ALLOW-FROM https://syscred.uqam.ca'
97
+ response.headers['Content-Security-Policy'] = (
98
+ "frame-ancestors 'self' https://syscred.uqam.ca https://*.uqam.ca"
99
+ )
100
+ return response
101
+
102
  # Initialize Database
103
  try:
104
  init_db(app) # [NEW] Setup DB connection
 
281
 
282
  print(f"[SysCRED Backend] Score: {result.get('scoreCredibilite', 'N/A')}")
283
 
284
+ # [NEW] TREC Evidence Search + IR Metrics
285
+ try:
286
+ global trec_retriever, eval_metrics
287
+
288
+ # Initialize TREC if needed
289
+ if trec_retriever is None and TREC_AVAILABLE:
290
+ trec_retriever = TRECRetriever(use_stemming=True, enable_prf=False)
291
+ trec_retriever.corpus = TREC_DEMO_CORPUS
292
+ eval_metrics = EvaluationMetrics()
293
+ print("[SysCRED Backend] TREC Retriever initialized with demo corpus")
294
+
295
+ if trec_retriever and eval_metrics:
296
+ import time
297
+ start_time = time.time()
298
+
299
+ # Use the input text as query
300
+ query_text = input_data[:200] if not credibility_system.is_url(input_data) else result.get('informationEntree', input_data)[:200]
301
+
302
+ trec_result = trec_retriever.retrieve_evidence(query_text, k=5, model='bm25')
303
+ search_time = (time.time() - start_time) * 1000
304
+
305
+ retrieved_ids = [e.doc_id for e in trec_result.evidences]
306
+
307
+ # Use climate-related docs as "relevant" for demo evaluation
308
+ # In production, this would come from qrels files
309
+ relevant_ids = set(TREC_DEMO_CORPUS.keys()) # All docs as relevant pool
310
+
311
+ # Compute IR metrics
312
+ k = len(retrieved_ids) if retrieved_ids else 1
313
+ precision = eval_metrics.precision_at_k(retrieved_ids, relevant_ids, k) if retrieved_ids else 0
314
+ recall = eval_metrics.recall_at_k(retrieved_ids, relevant_ids, k) if retrieved_ids else 0
315
+ ap = eval_metrics.average_precision(retrieved_ids, relevant_ids) if retrieved_ids else 0
316
+ mrr = eval_metrics.mrr(retrieved_ids, relevant_ids) if retrieved_ids else 0
317
+
318
+ relevance_dict = {doc: 1 for doc in relevant_ids}
319
+ ndcg = eval_metrics.ndcg_at_k(retrieved_ids, relevance_dict, k) if retrieved_ids else 0
320
+
321
+ # TF-IDF score from top result
322
+ tfidf_score = trec_result.evidences[0].score if trec_result.evidences else 0
323
+
324
+ result['trec_metrics'] = {
325
+ 'precision': round(precision, 4),
326
+ 'recall': round(recall, 4),
327
+ 'map': round(ap, 4),
328
+ 'ndcg': round(ndcg, 4),
329
+ 'tfidf_score': round(tfidf_score, 4),
330
+ 'mrr': round(mrr, 4),
331
+ 'retrieved_count': len(retrieved_ids),
332
+ 'corpus_size': len(TREC_DEMO_CORPUS),
333
+ 'search_time_ms': round(search_time, 2)
334
+ }
335
+ print(f"[SysCRED Backend] TREC: P={precision:.3f} R={recall:.3f} MAP={ap:.3f} NDCG={ndcg:.3f} MRR={mrr:.3f}")
336
+ except Exception as e:
337
+ print(f"[SysCRED Backend] TREC metrics error: {e}")
338
+ result['trec_metrics'] = {'error': str(e)}
339
+
340
  # [NEW] Persist to Database
341
  try:
342
  new_analysis = AnalysisResult(
syscred/config.py CHANGED
@@ -23,23 +23,22 @@ from pathlib import Path
23
  from typing import Dict, Optional
24
  from dotenv import load_dotenv
25
 
26
- # Charger les variables depuis .env
27
  # Charger les variables depuis .env (Project Root)
28
- # Path: .../systemFactChecking/02_Code/syscred/config.py
29
- # Root .env is at .../systemFactChecking/.env (3 levels up)
30
  current_path = Path(__file__).resolve()
31
- env_path = current_path.parent.parent.parent / '.env'
32
 
33
- try:
34
- if not env_path.exists():
35
- print(f"[Config] WARNING: .env not found at {env_path}")
36
- # Try alternate location (sometimes CWD matters)
37
- env_path = Path.cwd().parent / '.env'
38
-
39
- load_dotenv(dotenv_path=env_path)
40
- print(f"[Config] Loading .env from {env_path}")
41
- except PermissionError:
42
- print(f"[Config] Permission denied for .env, using system env vars")
43
  print(f"[Config] SYSCRED_GOOGLE_API_KEY loaded: {'Yes' if os.environ.get('SYSCRED_GOOGLE_API_KEY') else 'No'}")
44
 
45
 
@@ -53,8 +52,9 @@ class Config:
53
  """
54
 
55
  # === Chemins ===
 
56
  BASE_DIR = Path(__file__).parent.parent
57
- ONTOLOGY_BASE_PATH = BASE_DIR / "sysCRED_onto26avrtil.ttl"
58
  ONTOLOGY_DATA_PATH = BASE_DIR / "ontology" / "sysCRED_data.ttl"
59
 
60
  # === Serveur Flask ===
@@ -64,7 +64,7 @@ class Config:
64
 
65
  # === API Keys ===
66
  GOOGLE_FACT_CHECK_API_KEY = os.getenv("SYSCRED_GOOGLE_API_KEY")
67
- DATABASE_URL = os.getenv("DATABASE_URL") # [NEW] Read DB URL from env
68
 
69
  # === Modèles ML ===
70
  # Support both SYSCRED_LOAD_ML and SYSCRED_LOAD_ML_MODELS (for Render)
 
23
  from typing import Dict, Optional
24
  from dotenv import load_dotenv
25
 
 
26
  # Charger les variables depuis .env (Project Root)
27
+ # Path: .../systemFactChecking/syscred/config.py
28
+ # Root .env is at .../systemFactChecking/.env (1 level up from syscred/)
29
  current_path = Path(__file__).resolve()
30
+ env_path = current_path.parent.parent / '.env'
31
 
32
+ if not env_path.exists():
33
+ print(f"[Config] WARNING: .env not found at {env_path}")
34
+ # Try alternate locations
35
+ for alt in [Path.cwd() / '.env', Path.cwd().parent / '.env']:
36
+ if alt.exists():
37
+ env_path = alt
38
+ break
39
+
40
+ load_dotenv(dotenv_path=env_path)
41
+ print(f"[Config] Loading .env from {env_path}")
42
  print(f"[Config] SYSCRED_GOOGLE_API_KEY loaded: {'Yes' if os.environ.get('SYSCRED_GOOGLE_API_KEY') else 'No'}")
43
 
44
 
 
52
  """
53
 
54
  # === Chemins ===
55
+ # BASE_DIR = project root (parent of syscred/)
56
  BASE_DIR = Path(__file__).parent.parent
57
+ ONTOLOGY_BASE_PATH = BASE_DIR / "ontology" / "sysCRED_onto26avrtil.ttl"
58
  ONTOLOGY_DATA_PATH = BASE_DIR / "ontology" / "sysCRED_data.ttl"
59
 
60
  # === Serveur Flask ===
 
64
 
65
  # === API Keys ===
66
  GOOGLE_FACT_CHECK_API_KEY = os.getenv("SYSCRED_GOOGLE_API_KEY")
67
+ DATABASE_URL = os.getenv("SYSCRED_DATABASE_URL", os.getenv("DATABASE_URL")) # Standardized env var
68
 
69
  # === Modèles ML ===
70
  # Support both SYSCRED_LOAD_ML and SYSCRED_LOAD_ML_MODELS (for Render)
syscred/database.py CHANGED
@@ -3,6 +3,7 @@
3
  Database Manager for SysCRED
4
  ===========================
5
  Handles connection to Supabase (PostgreSQL) and defines models.
 
6
  """
7
 
8
  import os
@@ -32,23 +33,38 @@ class AnalysisResult(db.Model):
32
  'url': self.url,
33
  'score': self.credibility_score,
34
  'summary': self.summary,
35
- 'created_at': self.created_at.isoformat(),
36
  'source_reputation': self.source_reputation
37
  }
38
 
39
  def init_db(app):
40
  """Initialize the database with the Flask app."""
41
- # Fallback to sqlite for local dev if no DATABASE_URL
42
- db_url = os.environ.get('DATABASE_URL')
43
  if db_url and db_url.startswith("postgres://"):
44
  db_url = db_url.replace("postgres://", "postgresql://", 1)
45
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  app.config['SQLALCHEMY_DATABASE_URI'] = db_url or 'sqlite:///syscred.db'
47
  app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
48
 
49
  db.init_app(app)
50
 
51
- # Create tables if they don't exist (basic migration)
52
  with app.app_context():
53
- db.create_all()
54
- print("[SysCRED-DB] Database tables initialized.")
 
 
 
 
 
3
  Database Manager for SysCRED
4
  ===========================
5
  Handles connection to Supabase (PostgreSQL) and defines models.
6
+ Falls back to SQLite if PostgreSQL is unavailable.
7
  """
8
 
9
  import os
 
33
  'url': self.url,
34
  'score': self.credibility_score,
35
  'summary': self.summary,
36
+ 'created_at': self.created_at.isoformat() if self.created_at else None,
37
  'source_reputation': self.source_reputation
38
  }
39
 
40
  def init_db(app):
41
  """Initialize the database with the Flask app."""
42
+ # Use SYSCRED_DATABASE_URL first (from .env), fallback to DATABASE_URL (from Render/HF)
43
+ db_url = os.environ.get('SYSCRED_DATABASE_URL') or os.environ.get('DATABASE_URL')
44
  if db_url and db_url.startswith("postgres://"):
45
  db_url = db_url.replace("postgres://", "postgresql://", 1)
46
 
47
+ # Test PostgreSQL reachability before committing to it
48
+ if db_url and 'postgresql' in db_url:
49
+ try:
50
+ import socket
51
+ from urllib.parse import urlparse
52
+ parsed = urlparse(db_url)
53
+ socket.getaddrinfo(parsed.hostname, parsed.port or 5432)
54
+ except (socket.gaierror, Exception) as e:
55
+ print(f"[SysCRED-DB] PostgreSQL host unreachable ({parsed.hostname}): {e}")
56
+ print("[SysCRED-DB] Falling back to SQLite...")
57
+ db_url = None # Force SQLite fallback
58
+
59
  app.config['SQLALCHEMY_DATABASE_URI'] = db_url or 'sqlite:///syscred.db'
60
  app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
61
 
62
  db.init_app(app)
63
 
 
64
  with app.app_context():
65
+ try:
66
+ db.create_all()
67
+ db_type = 'PostgreSQL (Supabase)' if db_url else 'SQLite (local)'
68
+ print(f"[SysCRED-DB] Database initialized: {db_type}")
69
+ except Exception as e:
70
+ print(f"[SysCRED-DB] Database init error: {e}")
syscred/db_store.py ADDED
@@ -0,0 +1,354 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ SysCRED Storage Module - SQLite + Supabase
3
+ ==========================================
4
+ Stocke les triplets RDF et résultats d'analyse.
5
+ Utilise SQLite localement, avec option de sync vers Supabase.
6
+ """
7
+
8
+ import os
9
+ import sqlite3
10
+ import hashlib
11
+ import json
12
+ from datetime import datetime
13
+ from typing import Optional, Dict, Any, List, Tuple
14
+ from urllib.parse import urlparse
15
+ from pathlib import Path
16
+
17
+ # Chemins
18
+ BASE_DIR = Path(__file__).parent
19
+ DB_PATH = BASE_DIR / "syscred_local.db"
20
+
21
+ class SysCREDStore:
22
+ """
23
+ Gestionnaire de stockage pour SysCRED.
24
+ SQLite local avec option Supabase.
25
+ """
26
+
27
+ def __init__(self, db_path: str = None, supabase_url: str = None):
28
+ self.db_path = db_path or str(DB_PATH)
29
+ self.supabase_url = supabase_url or os.getenv("DATABASE_URL")
30
+ self.conn = None
31
+ self._init_local_db()
32
+
33
+ def _init_local_db(self):
34
+ """Initialise la base SQLite locale."""
35
+ self.conn = sqlite3.connect(self.db_path, check_same_thread=False)
36
+ self.conn.row_factory = sqlite3.Row
37
+
38
+ # Créer les tables
39
+ self.conn.executescript("""
40
+ -- Résultats d'analyse
41
+ CREATE TABLE IF NOT EXISTS analysis_results (
42
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
43
+ url TEXT NOT NULL,
44
+ credibility_score REAL NOT NULL,
45
+ summary TEXT,
46
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
47
+ source_reputation TEXT,
48
+ fact_check_count INTEGER DEFAULT 0,
49
+ score_details TEXT,
50
+ domain TEXT
51
+ );
52
+
53
+ -- Triplets RDF
54
+ CREATE TABLE IF NOT EXISTS rdf_triples (
55
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
56
+ subject TEXT NOT NULL,
57
+ predicate TEXT NOT NULL,
58
+ object TEXT NOT NULL,
59
+ object_type TEXT DEFAULT 'uri',
60
+ graph_name TEXT DEFAULT 'data',
61
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
62
+ UNIQUE(subject, predicate, object, graph_name)
63
+ );
64
+
65
+ -- Sources
66
+ CREATE TABLE IF NOT EXISTS sources (
67
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
68
+ domain TEXT UNIQUE NOT NULL,
69
+ reputation_score REAL,
70
+ domain_age_years REAL,
71
+ is_fact_checker INTEGER DEFAULT 0,
72
+ analysis_count INTEGER DEFAULT 0,
73
+ last_analyzed TIMESTAMP,
74
+ metadata TEXT,
75
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
76
+ );
77
+
78
+ -- Claims
79
+ CREATE TABLE IF NOT EXISTS claims (
80
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
81
+ claim_text TEXT NOT NULL,
82
+ claim_hash TEXT UNIQUE,
83
+ source_url TEXT,
84
+ extracted_entities TEXT,
85
+ credibility_score REAL,
86
+ verification_status TEXT DEFAULT 'unverified',
87
+ evidence_count INTEGER DEFAULT 0,
88
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
89
+ );
90
+
91
+ -- Evidence
92
+ CREATE TABLE IF NOT EXISTS evidence (
93
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
94
+ claim_id INTEGER,
95
+ doc_id TEXT,
96
+ doc_text TEXT,
97
+ relevance_score REAL,
98
+ retrieval_method TEXT DEFAULT 'bm25',
99
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
100
+ FOREIGN KEY (claim_id) REFERENCES claims(id)
101
+ );
102
+
103
+ -- Index
104
+ CREATE INDEX IF NOT EXISTS idx_analysis_url ON analysis_results(url);
105
+ CREATE INDEX IF NOT EXISTS idx_triple_subject ON rdf_triples(subject);
106
+ CREATE INDEX IF NOT EXISTS idx_triple_graph ON rdf_triples(graph_name);
107
+ CREATE INDEX IF NOT EXISTS idx_sources_domain ON sources(domain);
108
+ """)
109
+ self.conn.commit()
110
+ print(f"[SysCREDStore] SQLite initialisé: {self.db_path}")
111
+
112
+ # =========================================================================
113
+ # ONTOLOGY / RDF TRIPLES
114
+ # =========================================================================
115
+
116
+ def sync_ontology(self, ontology_manager) -> Dict[str, int]:
117
+ """
118
+ Synchronise les graphes RDFLib vers SQLite.
119
+
120
+ Args:
121
+ ontology_manager: Instance avec base_graph et data_graph
122
+ """
123
+ result = {'base_synced': 0, 'data_synced': 0}
124
+
125
+ try:
126
+ # Sync base ontology
127
+ if hasattr(ontology_manager, 'base_graph') and ontology_manager.base_graph:
128
+ result['base_synced'] = self._sync_graph(
129
+ ontology_manager.base_graph,
130
+ graph_name='base'
131
+ )
132
+
133
+ # Sync data graph
134
+ if hasattr(ontology_manager, 'data_graph') and ontology_manager.data_graph:
135
+ result['data_synced'] = self._sync_graph(
136
+ ontology_manager.data_graph,
137
+ graph_name='data'
138
+ )
139
+
140
+ self.conn.commit()
141
+ print(f"[SysCREDStore] Synced {result['base_synced']} base + {result['data_synced']} data triples")
142
+
143
+ except Exception as e:
144
+ result['error'] = str(e)
145
+ print(f"[SysCREDStore] Sync error: {e}")
146
+
147
+ return result
148
+
149
+ def _sync_graph(self, graph, graph_name: str) -> int:
150
+ """Sync un graphe RDFLib vers SQLite."""
151
+ from rdflib import Literal
152
+
153
+ count = 0
154
+ cursor = self.conn.cursor()
155
+
156
+ for s, p, o in graph:
157
+ subject = str(s)
158
+ predicate = str(p)
159
+ obj_value = str(o)
160
+ obj_type = 'literal' if isinstance(o, Literal) else 'uri'
161
+
162
+ try:
163
+ cursor.execute("""
164
+ INSERT OR IGNORE INTO rdf_triples
165
+ (subject, predicate, object, object_type, graph_name)
166
+ VALUES (?, ?, ?, ?, ?)
167
+ """, (subject, predicate, obj_value, obj_type, graph_name))
168
+ count += 1
169
+ except:
170
+ pass
171
+
172
+ return count
173
+
174
+ def get_triple_stats(self) -> Dict[str, int]:
175
+ """Statistiques des triplets."""
176
+ cursor = self.conn.cursor()
177
+
178
+ cursor.execute("SELECT COUNT(*) FROM rdf_triples WHERE graph_name = 'base'")
179
+ base = cursor.fetchone()[0]
180
+
181
+ cursor.execute("SELECT COUNT(*) FROM rdf_triples WHERE graph_name = 'data'")
182
+ data = cursor.fetchone()[0]
183
+
184
+ return {
185
+ 'base_triples': base,
186
+ 'data_triples': data,
187
+ 'total_triples': base + data
188
+ }
189
+
190
+ # =========================================================================
191
+ # ANALYSIS RESULTS
192
+ # =========================================================================
193
+
194
+ def save_analysis(self, url: str, credibility_score: float,
195
+ summary: str = None, score_details: Dict = None,
196
+ source_reputation: str = None, fact_check_count: int = 0) -> int:
197
+ """Sauvegarde un résultat d'analyse."""
198
+ domain = urlparse(url).netloc
199
+
200
+ cursor = self.conn.cursor()
201
+ cursor.execute("""
202
+ INSERT INTO analysis_results
203
+ (url, credibility_score, summary, score_details, source_reputation,
204
+ fact_check_count, domain)
205
+ VALUES (?, ?, ?, ?, ?, ?, ?)
206
+ """, (
207
+ url, credibility_score, summary,
208
+ json.dumps(score_details) if score_details else None,
209
+ source_reputation, fact_check_count, domain
210
+ ))
211
+ self.conn.commit()
212
+
213
+ result_id = cursor.lastrowid
214
+ print(f"[SysCREDStore] Saved analysis #{result_id} for {domain}")
215
+
216
+ # Update source stats
217
+ self._update_source(domain, credibility_score)
218
+
219
+ return result_id
220
+
221
+ def get_history(self, url: str = None, limit: int = 50) -> List[Dict]:
222
+ """Récupère l'historique des analyses."""
223
+ cursor = self.conn.cursor()
224
+
225
+ if url:
226
+ cursor.execute("""
227
+ SELECT * FROM analysis_results
228
+ WHERE url = ? ORDER BY created_at DESC LIMIT ?
229
+ """, (url, limit))
230
+ else:
231
+ cursor.execute("""
232
+ SELECT * FROM analysis_results
233
+ ORDER BY created_at DESC LIMIT ?
234
+ """, (limit,))
235
+
236
+ return [dict(row) for row in cursor.fetchall()]
237
+
238
+ # =========================================================================
239
+ # SOURCES
240
+ # =========================================================================
241
+
242
+ def _update_source(self, domain: str, score: float = None):
243
+ """Met à jour les stats d'une source."""
244
+ cursor = self.conn.cursor()
245
+
246
+ cursor.execute("SELECT id, analysis_count FROM sources WHERE domain = ?", (domain,))
247
+ row = cursor.fetchone()
248
+
249
+ if row:
250
+ cursor.execute("""
251
+ UPDATE sources SET
252
+ analysis_count = analysis_count + 1,
253
+ last_analyzed = CURRENT_TIMESTAMP,
254
+ reputation_score = COALESCE(?, reputation_score)
255
+ WHERE domain = ?
256
+ """, (score, domain))
257
+ else:
258
+ cursor.execute("""
259
+ INSERT INTO sources (domain, reputation_score, analysis_count, last_analyzed)
260
+ VALUES (?, ?, 1, CURRENT_TIMESTAMP)
261
+ """, (domain, score))
262
+
263
+ self.conn.commit()
264
+
265
+ def get_source(self, domain: str) -> Optional[Dict]:
266
+ """Récupère les infos d'une source."""
267
+ cursor = self.conn.cursor()
268
+ cursor.execute("SELECT * FROM sources WHERE domain = ?", (domain,))
269
+ row = cursor.fetchone()
270
+ return dict(row) if row else None
271
+
272
+ # =========================================================================
273
+ # GLOBAL STATS
274
+ # =========================================================================
275
+
276
+ def get_stats(self) -> Dict[str, Any]:
277
+ """Statistiques globales."""
278
+ cursor = self.conn.cursor()
279
+
280
+ cursor.execute("SELECT COUNT(*) FROM analysis_results")
281
+ total_analyses = cursor.fetchone()[0]
282
+
283
+ cursor.execute("SELECT COUNT(*) FROM sources")
284
+ unique_domains = cursor.fetchone()[0]
285
+
286
+ cursor.execute("SELECT AVG(credibility_score) FROM analysis_results")
287
+ avg_score = cursor.fetchone()[0]
288
+
289
+ triple_stats = self.get_triple_stats()
290
+
291
+ return {
292
+ 'total_analyses': total_analyses,
293
+ 'unique_domains': unique_domains,
294
+ 'avg_credibility': round(avg_score, 2) if avg_score else None,
295
+ **triple_stats
296
+ }
297
+
298
+ def close(self):
299
+ """Ferme la connexion."""
300
+ if self.conn:
301
+ self.conn.close()
302
+
303
+
304
+ # ============================================================================
305
+ # INTEGRATION
306
+ # ============================================================================
307
+
308
+ def sync_ontology_to_db():
309
+ """Synchronise l'ontologie vers la base de données."""
310
+ import sys
311
+ sys.path.insert(0, str(BASE_DIR))
312
+
313
+ try:
314
+ from ontology_manager import OntologyManager
315
+ from config import Config
316
+
317
+ # Init ontology
318
+ onto = OntologyManager(
319
+ base_ontology_path=str(Config.ONTOLOGY_BASE_PATH),
320
+ data_path=str(Config.ONTOLOGY_DATA_PATH)
321
+ )
322
+
323
+ # Init store
324
+ store = SysCREDStore()
325
+
326
+ # Sync
327
+ result = store.sync_ontology(onto)
328
+ print(f"\n✅ Sync complete: {result}")
329
+
330
+ # Stats
331
+ stats = store.get_stats()
332
+ print(f"📊 Stats: {stats}")
333
+
334
+ return store
335
+
336
+ except ImportError as e:
337
+ print(f"Import error: {e}")
338
+ return None
339
+
340
+
341
+ # ============================================================================
342
+ # CLI
343
+ # ============================================================================
344
+
345
+ if __name__ == "__main__":
346
+ print("=" * 60)
347
+ print("SysCRED Storage - Synchronisation des triplets")
348
+ print("=" * 60)
349
+
350
+ store = sync_ontology_to_db()
351
+
352
+ if store:
353
+ print("\n✅ Base de données prête!")
354
+ print(f" Fichier: {store.db_path}")
syscred/eeat_calculator.py CHANGED
@@ -1,41 +1,118 @@
 
1
  # -*- coding: utf-8 -*-
2
  """
3
- E-E-A-T Calculator Module - SysCRED
4
- ====================================
5
- Google Quality Rater Guidelines implementation.
6
 
7
- E-E-A-T Scores:
8
- - Experience: Domain age, content richness
9
- - Expertise: Technical vocabulary, citations
10
- - Authority: Estimated PageRank, backlinks
11
- - Trust: HTTPS, unbiased sentiment
12
-
13
- (c) Dominique S. Loyer - PhD Thesis Prototype
14
  """
15
 
 
 
16
  import re
17
- from typing import Dict, Optional
18
- from urllib.parse import urlparse
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
 
21
  class EEATCalculator:
22
  """
23
- Calculate E-E-A-T scores based on Google Quality Rater Guidelines.
 
 
 
 
 
 
24
  """
25
 
26
- # Technical terms that indicate expertise
27
- TECHNICAL_TERMS = {
28
- 'research', 'study', 'analysis', 'data', 'evidence', 'methodology',
29
- 'peer-reviewed', 'journal', 'university', 'professor', 'dr.', 'phd',
30
- 'statistics', 'experiment', 'hypothesis', 'publication', 'citation',
31
- 'algorithm', 'framework', 'systematic', 'empirical', 'quantitative'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  }
33
 
34
- # Trusted domains (simplified list)
35
- TRUSTED_DOMAINS = {
36
- '.edu', '.gov', '.org', 'reuters.com', 'apnews.com', 'bbc.com',
37
- 'nature.com', 'science.org', 'who.int', 'un.org', 'wikipedia.org',
38
- 'lemonde.fr', 'radio-canada.ca', 'uqam.ca', 'umontreal.ca'
 
39
  }
40
 
41
  def __init__(self):
@@ -44,227 +121,346 @@ class EEATCalculator:
44
 
45
  def calculate(
46
  self,
47
- url: Optional[str] = None,
48
- text: Optional[str] = None,
49
- sentiment_score: float = 0.5,
50
- has_citations: bool = False,
51
- domain_age_years: int = 0
52
- ) -> Dict:
 
 
 
 
53
  """
54
- Calculate E-E-A-T scores.
55
 
56
  Args:
57
  url: Source URL
58
- text: Content text
59
- sentiment_score: 0-1 (0.5 = neutral is best for trust)
60
- has_citations: Whether content has citations
61
- domain_age_years: Estimated domain age
62
-
 
 
 
 
63
  Returns:
64
- {
65
- 'experience': 0.75,
66
- 'expertise': 0.80,
67
- 'authority': 0.65,
68
- 'trust': 0.90,
69
- 'overall': 0.78,
70
- 'details': {...}
71
- }
72
  """
73
- details = {}
74
-
75
- # --- EXPERIENCE ---
76
- experience = 0.5
77
- if domain_age_years >= 10:
78
- experience += 0.3
79
- elif domain_age_years >= 5:
80
- experience += 0.2
81
- elif domain_age_years >= 2:
82
- experience += 0.1
83
-
84
- if text:
85
- word_count = len(text.split())
86
- if word_count >= 1000:
87
- experience += 0.15
88
- elif word_count >= 500:
89
- experience += 0.1
90
-
91
- experience = min(experience, 1.0)
92
- details['experience_factors'] = {
93
- 'domain_age_bonus': domain_age_years >= 2,
94
- 'content_richness': len(text.split()) if text else 0
95
- }
96
 
97
- # --- EXPERTISE ---
98
- expertise = 0.4
99
- tech_count = 0
 
 
 
100
 
101
- if text:
102
- text_lower = text.lower()
103
- for term in self.TECHNICAL_TERMS:
104
- if term in text_lower:
105
- tech_count += 1
106
-
107
- if tech_count >= 5:
108
- expertise += 0.35
109
- elif tech_count >= 3:
110
- expertise += 0.25
111
- elif tech_count >= 1:
112
- expertise += 0.15
113
-
114
- if has_citations:
115
- expertise += 0.2
116
-
117
- expertise = min(expertise, 1.0)
118
- details['expertise_factors'] = {
119
- 'technical_terms_found': tech_count,
120
- 'has_citations': has_citations
121
- }
122
 
123
- # --- AUTHORITY ---
124
- authority = 0.3
 
 
 
125
 
126
- if url:
127
- parsed = urlparse(url)
128
- domain = parsed.netloc.lower()
129
-
130
- for trusted in self.TRUSTED_DOMAINS:
131
- if trusted in domain:
132
- authority += 0.4
133
- break
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
 
135
- if parsed.scheme == 'https':
136
- authority += 0.1
137
-
138
- # Check for author indicators in text
139
- if text:
140
- author_patterns = [r'by\s+\w+\s+\w+', r'author:', r'written by', r'par\s+\w+']
141
- for pattern in author_patterns:
142
- if re.search(pattern, text.lower()):
143
- authority += 0.15
144
- break
145
-
146
- authority = min(authority, 1.0)
147
- details['authority_factors'] = {
148
- 'trusted_domain': False,
149
- 'https': url and urlparse(url).scheme == 'https' if url else False
150
- }
151
 
152
- # --- TRUST ---
153
- trust = 0.5
154
-
155
- # Neutral sentiment is best (0.5)
156
- sentiment_deviation = abs(sentiment_score - 0.5)
157
- if sentiment_deviation < 0.1:
158
- trust += 0.3 # Very neutral
159
- elif sentiment_deviation < 0.2:
160
- trust += 0.2
161
- elif sentiment_deviation < 0.3:
162
- trust += 0.1
163
-
164
- if url and urlparse(url).scheme == 'https':
165
- trust += 0.15
166
-
167
- trust = min(trust, 1.0)
168
- details['trust_factors'] = {
169
- 'sentiment_neutrality': 1 - sentiment_deviation * 2,
170
- 'secure_connection': url and 'https' in url if url else False
171
- }
172
 
173
- # --- OVERALL ---
174
- overall = (experience * 0.2 + expertise * 0.3 +
175
- authority * 0.25 + trust * 0.25)
 
 
176
 
177
- return {
178
- 'experience': round(experience, 2),
179
- 'expertise': round(expertise, 2),
180
- 'authority': round(authority, 2),
181
- 'trust': round(trust, 2),
182
- 'overall': round(overall, 2),
183
- 'details': details
184
- }
185
 
186
- def get_explanation(self, scores: Dict) -> str:
187
- """Generate human-readable explanation of E-E-A-T scores."""
 
 
 
 
 
 
 
 
 
 
 
188
  explanations = []
189
 
190
- exp = scores.get('experience', 0)
191
- if exp >= 0.7:
192
- explanations.append("✅ Expérience: Source établie avec contenu riche")
193
- elif exp >= 0.5:
194
- explanations.append("⚠️ Expérience: Source moyennement établie")
195
  else:
196
- explanations.append(" Expérience: Source nouvelle ou contenu limité")
197
 
198
- ext = scores.get('expertise', 0)
199
- if ext >= 0.7:
200
- explanations.append("✅ Expertise: Vocabulaire technique, citations présentes")
201
- elif ext >= 0.5:
202
- explanations.append("⚠️ Expertise: Niveau technique moyen")
203
  else:
204
- explanations.append(" Expertise: Manque de terminologie spécialisée")
205
 
206
- auth = scores.get('authority', 0)
207
- if auth >= 0.7:
208
- explanations.append("✅ Autorité: Domaine reconnu et fiable")
209
- elif auth >= 0.5:
210
- explanations.append("⚠️ Autorité: Niveau d'autorité moyen")
211
  else:
212
- explanations.append(" Autorité: Source non reconnue")
213
 
214
- tr = scores.get('trust', 0)
215
- if tr >= 0.7:
216
- explanations.append("✅ Confiance: Ton neutre, connexion sécurisée")
217
- elif tr >= 0.5:
218
- explanations.append("⚠️ Confiance: Niveau de confiance moyen")
219
  else:
220
- explanations.append(" Confiance: Ton biaisé ou connexion non sécurisée")
221
 
222
  return "\n".join(explanations)
223
 
224
 
225
- # Singleton
226
- _calculator = None
227
-
228
- def get_calculator() -> EEATCalculator:
229
- """Get or create E-E-A-T calculator singleton."""
230
- global _calculator
231
- if _calculator is None:
232
- _calculator = EEATCalculator()
233
- return _calculator
234
-
235
-
236
- # --- Testing ---
237
  if __name__ == "__main__":
238
- print("=" * 60)
239
- print("SysCRED E-E-A-T Calculator - Test")
240
- print("=" * 60)
241
-
242
  calc = EEATCalculator()
243
 
244
- test_url = "https://www.nature.com/articles/example"
245
  test_text = """
246
- A peer-reviewed study published in the journal Nature found evidence
247
- that the new methodology significantly improves research outcomes.
248
- Dr. Smith from Harvard University presented the statistics at the conference.
249
  """
250
 
251
- result = calc.calculate(
 
 
 
 
 
252
  url=test_url,
253
  text=test_text,
254
- sentiment_score=0.5,
255
- has_citations=True,
256
- domain_age_years=15
 
257
  )
258
 
259
- print("\n--- E-E-A-T Scores ---")
260
- print(f" Experience: {result['experience']:.0%}")
261
- print(f" Expertise: {result['expertise']:.0%}")
262
- print(f" Authority: {result['authority']:.0%}")
263
- print(f" Trust: {result['trust']:.0%}")
264
- print(f" ─────────────────")
265
- print(f" OVERALL: {result['overall']:.0%}")
266
-
267
- print("\n--- Explanation ---")
268
- print(calc.get_explanation(result))
269
-
270
- print("\n" + "=" * 60)
 
1
+ #!/usr/bin/env python3
2
  # -*- coding: utf-8 -*-
3
  """
4
+ E-E-A-T Metrics Calculator for SysCRED
5
+ ========================================
6
+ Calculates Google-style E-E-A-T metrics (Experience, Expertise, Authority, Trust).
7
 
8
+ These metrics mirror modern Google ranking signals:
9
+ - Experience: Domain age, content freshness
10
+ - Expertise: Author identification, depth of content
11
+ - Authority: PageRank simulation, citations/backlinks
12
+ - Trust: HTTPS, fact-checks, low bias score
 
 
13
  """
14
 
15
+ from typing import Dict, Any, Optional, List
16
+ from dataclasses import dataclass
17
  import re
18
+ from datetime import datetime
19
+ import logging
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ @dataclass
25
+ class EEATScore:
26
+ """E-E-A-T score container."""
27
+ experience: float # 0-1
28
+ expertise: float # 0-1
29
+ authority: float # 0-1
30
+ trust: float # 0-1
31
+
32
+ @property
33
+ def overall(self) -> float:
34
+ """Weighted average of all E-E-A-T components."""
35
+ # Weights based on Google's emphasis
36
+ weights = {
37
+ 'experience': 0.15,
38
+ 'expertise': 0.25,
39
+ 'authority': 0.35,
40
+ 'trust': 0.25
41
+ }
42
+ return (
43
+ self.experience * weights['experience'] +
44
+ self.expertise * weights['expertise'] +
45
+ self.authority * weights['authority'] +
46
+ self.trust * weights['trust']
47
+ )
48
+
49
+ def to_dict(self) -> Dict[str, Any]:
50
+ """Convert to dictionary for JSON serialization."""
51
+ return {
52
+ 'experience': round(self.experience, 3),
53
+ 'expertise': round(self.expertise, 3),
54
+ 'authority': round(self.authority, 3),
55
+ 'trust': round(self.trust, 3),
56
+ 'overall': round(self.overall, 3),
57
+ 'experience_pct': f"{int(self.experience * 100)}%",
58
+ 'expertise_pct': f"{int(self.expertise * 100)}%",
59
+ 'authority_pct': f"{int(self.authority * 100)}%",
60
+ 'trust_pct': f"{int(self.trust * 100)}%",
61
+ 'overall_pct': f"{int(self.overall * 100)}%"
62
+ }
63
 
64
 
65
  class EEATCalculator:
66
  """
67
+ Calculate E-E-A-T metrics from various signals.
68
+
69
+ Mirrors Google's quality rater guidelines:
70
+ - Experience: Has the author demonstrated real experience?
71
+ - Expertise: Is the content expert-level?
72
+ - Authority: Is the source recognized as authoritative?
73
+ - Trust: Is the source trustworthy?
74
  """
75
 
76
+ # Known authoritative domains
77
+ AUTHORITATIVE_DOMAINS = {
78
+ # News
79
+ 'lemonde.fr': 0.95,
80
+ 'lefigaro.fr': 0.90,
81
+ 'liberation.fr': 0.88,
82
+ 'nytimes.com': 0.95,
83
+ 'washingtonpost.com': 0.93,
84
+ 'theguardian.com': 0.92,
85
+ 'bbc.com': 0.94,
86
+ 'bbc.co.uk': 0.94,
87
+ 'reuters.com': 0.96,
88
+ 'apnews.com': 0.95,
89
+ # Academic
90
+ 'nature.com': 0.98,
91
+ 'science.org': 0.98,
92
+ 'pubmed.ncbi.nlm.nih.gov': 0.97,
93
+ 'scholar.google.com': 0.85,
94
+ # Government
95
+ 'gouv.fr': 0.90,
96
+ 'gov.uk': 0.90,
97
+ 'whitehouse.gov': 0.88,
98
+ 'europa.eu': 0.92,
99
+ # Fact-checkers
100
+ 'snopes.com': 0.88,
101
+ 'factcheck.org': 0.90,
102
+ 'politifact.com': 0.88,
103
+ 'fullfact.org': 0.89,
104
+ # Wikipedia (moderate authority)
105
+ 'wikipedia.org': 0.75,
106
+ 'fr.wikipedia.org': 0.75,
107
+ 'en.wikipedia.org': 0.75,
108
  }
109
 
110
+ # Low-trust domains (misinformation sources)
111
+ LOW_TRUST_DOMAINS = {
112
+ 'infowars.com': 0.1,
113
+ 'breitbart.com': 0.3,
114
+ 'naturalnews.com': 0.15,
115
+ # Add more as needed
116
  }
117
 
118
  def __init__(self):
 
121
 
122
  def calculate(
123
  self,
124
+ url: str,
125
+ text: str,
126
+ nlp_analysis: Optional[Dict[str, Any]] = None,
127
+ pagerank: Optional[float] = None,
128
+ fact_checks: Optional[List[Dict]] = None,
129
+ domain_age_years: Optional[float] = None,
130
+ has_https: bool = True,
131
+ author_identified: bool = False,
132
+ seo_score: Optional[float] = None
133
+ ) -> EEATScore:
134
  """
135
+ Calculate E-E-A-T scores from available signals.
136
 
137
  Args:
138
  url: Source URL
139
+ text: Article text content
140
+ nlp_analysis: NLP analysis results (sentiment, coherence, bias)
141
+ pagerank: Simulated PageRank score (0-1)
142
+ fact_checks: List of fact-check results
143
+ domain_age_years: Domain age in years (from WHOIS)
144
+ has_https: Whether site uses HTTPS
145
+ author_identified: Whether author is clearly identified
146
+ seo_score: SEO/technical quality score
147
+
148
  Returns:
149
+ EEATScore with all component scores
 
 
 
 
 
 
 
150
  """
151
+ # Extract domain from URL
152
+ domain = self._extract_domain(url)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
 
154
+ # Calculate each component
155
+ experience = self._calculate_experience(
156
+ domain_age_years,
157
+ text,
158
+ nlp_analysis
159
+ )
160
 
161
+ expertise = self._calculate_expertise(
162
+ text,
163
+ author_identified,
164
+ nlp_analysis
165
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
 
167
+ authority = self._calculate_authority(
168
+ domain,
169
+ pagerank,
170
+ seo_score
171
+ )
172
 
173
+ trust = self._calculate_trust(
174
+ domain,
175
+ has_https,
176
+ fact_checks,
177
+ nlp_analysis
178
+ )
179
+
180
+ return EEATScore(
181
+ experience=experience,
182
+ expertise=expertise,
183
+ authority=authority,
184
+ trust=trust
185
+ )
186
+
187
+ def _extract_domain(self, url: str) -> str:
188
+ """Extract domain from URL."""
189
+ import re
190
+ match = re.search(r'https?://(?:www\.)?([^/]+)', url)
191
+ return match.group(1).lower() if match else url.lower()
192
+
193
+ def _calculate_experience(
194
+ self,
195
+ domain_age_years: Optional[float],
196
+ text: str,
197
+ nlp_analysis: Optional[Dict]
198
+ ) -> float:
199
+ """
200
+ Calculate Experience score.
201
+
202
+ Factors:
203
+ - Domain age (longer = more experience)
204
+ - Content freshness (recently updated)
205
+ - First-hand experience indicators in text
206
+ """
207
+ score = 0.5 # Base score
208
+
209
+ # Domain age contribution (max 0.3)
210
+ if domain_age_years is not None:
211
+ age_score = min(domain_age_years / 20, 1.0) * 0.3 # 20 years = max
212
+ score += age_score
213
+ else:
214
+ score += 0.15 # Assume moderate age
215
+
216
+ # Content depth contribution (max 0.2)
217
+ word_count = len(text.split()) if text else 0
218
+ if word_count > 1000:
219
+ score += 0.2
220
+ elif word_count > 500:
221
+ score += 0.15
222
+ elif word_count > 200:
223
+ score += 0.1
224
+
225
+ # First-hand experience indicators (max 0.1)
226
+ experience_indicators = [
227
+ r'\b(j\'ai|je suis|nous avons|I have|we have|in my experience)\b',
228
+ r'\b(interview|entretien|témoignage|witness|firsthand)\b',
229
+ r'\b(sur place|on the ground|eyewitness)\b'
230
+ ]
231
+ for pattern in experience_indicators:
232
+ if re.search(pattern, text, re.IGNORECASE):
233
+ score += 0.03
234
+
235
+ return min(score, 1.0)
236
+
237
+ def _calculate_expertise(
238
+ self,
239
+ text: str,
240
+ author_identified: bool,
241
+ nlp_analysis: Optional[Dict]
242
+ ) -> float:
243
+ """
244
+ Calculate Expertise score.
245
+
246
+ Factors:
247
+ - Author identification
248
+ - Technical depth of content
249
+ - Citation of sources
250
+ - Coherence (from NLP)
251
+ """
252
+ score = 0.4 # Base score
253
+
254
+ # Author identification (0.2)
255
+ if author_identified:
256
+ score += 0.2
257
+
258
+ # Citation indicators (max 0.2)
259
+ citation_patterns = [
260
+ r'\b(selon|according to|d\'après|source:)\b',
261
+ r'\b(étude|study|research|rapport|report)\b',
262
+ r'\b(expert|spécialiste|chercheur|professor|Dr\.)\b',
263
+ r'\[([\d]+)\]', # [1] style citations
264
+ r'https?://[^\s]+' # Links
265
+ ]
266
+ citation_count = 0
267
+ for pattern in citation_patterns:
268
+ citation_count += len(re.findall(pattern, text, re.IGNORECASE))
269
+ score += min(citation_count * 0.02, 0.2)
270
+
271
+ # Coherence from NLP analysis (0.2)
272
+ if nlp_analysis and 'coherence' in nlp_analysis:
273
+ coherence = nlp_analysis['coherence']
274
+ if isinstance(coherence, dict):
275
+ coherence = coherence.get('score', 0.5)
276
+ score += coherence * 0.2
277
+ else:
278
+ score += 0.1 # Assume moderate coherence
279
+
280
+ return min(score, 1.0)
281
+
282
+ def _calculate_authority(
283
+ self,
284
+ domain: str,
285
+ pagerank: Optional[float],
286
+ seo_score: Optional[float]
287
+ ) -> float:
288
+ """
289
+ Calculate Authority score.
290
+
291
+ Factors:
292
+ - Known authoritative domain
293
+ - PageRank simulation
294
+ - SEO/technical quality
295
+ """
296
+ score = 0.3 # Base score
297
+
298
+ # Known domain authority (max 0.5)
299
+ for known_domain, authority in self.AUTHORITATIVE_DOMAINS.items():
300
+ if known_domain in domain:
301
+ score = max(score, authority * 0.5 + 0.3)
302
+ break
303
+
304
+ # Check low-trust domains
305
+ for low_trust_domain, low_score in self.LOW_TRUST_DOMAINS.items():
306
+ if low_trust_domain in domain:
307
+ score = min(score, low_score)
308
+ break
309
+
310
+ # PageRank contribution (max 0.3)
311
+ if pagerank is not None:
312
+ score += pagerank * 0.3
313
+ else:
314
+ score += 0.15 # Assume moderate pagerank
315
+
316
+ # SEO score contribution (max 0.2)
317
+ if seo_score is not None:
318
+ score += seo_score * 0.2
319
+ else:
320
+ score += 0.1
321
+
322
+ return min(score, 1.0)
323
+
324
+ def _calculate_trust(
325
+ self,
326
+ domain: str,
327
+ has_https: bool,
328
+ fact_checks: Optional[List[Dict]],
329
+ nlp_analysis: Optional[Dict]
330
+ ) -> float:
331
+ """
332
+ Calculate Trust score.
333
+
334
+ Factors:
335
+ - HTTPS
336
+ - Fact-check results
337
+ - Bias score (low = better)
338
+ - Known trustworthy domain
339
+ """
340
+ score = 0.4 # Base score
341
+
342
+ # HTTPS (0.1)
343
+ if has_https:
344
+ score += 0.1
345
+
346
+ # Fact-check results (max 0.3)
347
+ if fact_checks:
348
+ positive_checks = sum(1 for fc in fact_checks
349
+ if fc.get('rating', '').lower() in ['true', 'vrai', 'correct'])
350
+ negative_checks = sum(1 for fc in fact_checks
351
+ if fc.get('rating', '').lower() in ['false', 'faux', 'incorrect', 'pants-fire'])
352
 
353
+ if positive_checks > 0:
354
+ score += 0.2
355
+ if negative_checks > 0:
356
+ score -= 0.3
 
 
 
 
 
 
 
 
 
 
 
 
357
 
358
+ # Bias score (max 0.2, lower bias = higher trust)
359
+ if nlp_analysis:
360
+ bias_data = nlp_analysis.get('bias_analysis', {})
361
+ if isinstance(bias_data, dict):
362
+ bias_score = bias_data.get('score', 0.3)
363
+ else:
364
+ bias_score = 0.3
365
+ # Invert: low bias = high trust contribution
366
+ score += (1 - bias_score) * 0.2
367
+ else:
368
+ score += 0.1
 
 
 
 
 
 
 
 
 
369
 
370
+ # Known trustworthy domain (0.1)
371
+ for known_domain in self.AUTHORITATIVE_DOMAINS:
372
+ if known_domain in domain:
373
+ score += 0.1
374
+ break
375
 
376
+ # Known low-trust domain (penalty)
377
+ for low_trust_domain in self.LOW_TRUST_DOMAINS:
378
+ if low_trust_domain in domain:
379
+ score -= 0.3
380
+ break
381
+
382
+ return max(min(score, 1.0), 0.0)
 
383
 
384
+ def explain_score(self, eeat: EEATScore, url: str) -> str:
385
+ """
386
+ Generate human-readable explanation of E-E-A-T score.
387
+
388
+ Args:
389
+ eeat: EEATScore instance
390
+ url: Source URL
391
+
392
+ Returns:
393
+ Formatted explanation string
394
+ """
395
+ domain = self._extract_domain(url)
396
+
397
  explanations = []
398
 
399
+ # Experience
400
+ if eeat.experience >= 0.8:
401
+ explanations.append(f"✅ **Expérience élevée** ({eeat.experience_pct}): Source établie depuis longtemps")
402
+ elif eeat.experience >= 0.5:
403
+ explanations.append(f"🔶 **Expérience moyenne** ({eeat.experience_pct}): Source modérément établie")
404
  else:
405
+ explanations.append(f"⚠️ **Expérience faible** ({eeat.experience_pct}): Source récente ou peu connue")
406
 
407
+ # Expertise
408
+ if eeat.expertise >= 0.8:
409
+ explanations.append(f"✅ **Expertise élevée** ({eeat.expertise_pct}): Contenu approfondi avec citations")
410
+ elif eeat.expertise >= 0.5:
411
+ explanations.append(f"🔶 **Expertise moyenne** ({eeat.expertise_pct}): Contenu standard")
412
  else:
413
+ explanations.append(f"⚠️ **Expertise faible** ({eeat.expertise_pct}): Manque de profondeur")
414
 
415
+ # Authority
416
+ if eeat.authority >= 0.8:
417
+ explanations.append(f"✅ **Autorité élevée** ({eeat.authority_pct}): Source très citée et reconnue")
418
+ elif eeat.authority >= 0.5:
419
+ explanations.append(f"🔶 **Autorité moyenne** ({eeat.authority_pct}): Source modérément reconnue")
420
  else:
421
+ explanations.append(f"⚠️ **Autorité faible** ({eeat.authority_pct}): Peu de citations externes")
422
 
423
+ # Trust
424
+ if eeat.trust >= 0.8:
425
+ explanations.append(f"✅ **Confiance élevée** ({eeat.trust_pct}): Faits vérifiés, pas de biais")
426
+ elif eeat.trust >= 0.5:
427
+ explanations.append(f"🔶 **Confiance moyenne** ({eeat.trust_pct}): Quelques signaux de confiance")
428
  else:
429
+ explanations.append(f"⚠️ **Confiance faible** ({eeat.trust_pct}): Prudence recommandée")
430
 
431
  return "\n".join(explanations)
432
 
433
 
434
+ # Test
 
 
 
 
 
 
 
 
 
 
 
435
  if __name__ == "__main__":
 
 
 
 
436
  calc = EEATCalculator()
437
 
438
+ test_url = "https://www.lemonde.fr/politique/article/2024/01/06/trump.html"
439
  test_text = """
440
+ Selon une étude du chercheur Dr. Martin, l'insurrection du 6 janvier 2021
441
+ au Capitol a été un événement marquant. Notre reporter sur place a témoigné
442
+ des événements. Les experts politiques analysent les conséquences.
443
  """
444
 
445
+ nlp_analysis = {
446
+ 'coherence': {'score': 0.8},
447
+ 'bias_analysis': {'score': 0.2}
448
+ }
449
+
450
+ eeat = calc.calculate(
451
  url=test_url,
452
  text=test_text,
453
+ nlp_analysis=nlp_analysis,
454
+ pagerank=0.7,
455
+ has_https=True,
456
+ author_identified=True
457
  )
458
 
459
+ print("=== E-E-A-T Scores ===")
460
+ print(f"Experience: {eeat.experience_pct}")
461
+ print(f"Expertise: {eeat.expertise_pct}")
462
+ print(f"Authority: {eeat.authority_pct}")
463
+ print(f"Trust: {eeat.trust_pct}")
464
+ print(f"Overall: {eeat.overall_pct}")
465
+ print("\n=== Explanation ===")
466
+ print(calc.explain_score(eeat, test_url))
 
 
 
 
syscred/ner_analyzer.py CHANGED
@@ -1,198 +1,283 @@
 
1
  # -*- coding: utf-8 -*-
2
  """
3
- NER Analyzer Module - SysCRED
4
- ==============================
5
- Named Entity Recognition for fact-checking enhancement.
6
 
7
- Extracts: PERSON, ORG, GPE, DATE, MISC entities
8
-
9
- (c) Dominique S. Loyer - PhD Thesis Prototype
 
 
 
 
10
  """
11
 
12
- import os
 
13
 
14
- # Check for spaCy
15
  try:
16
  import spacy
 
17
  HAS_SPACY = True
18
  except ImportError:
19
  HAS_SPACY = False
20
- print("[NER] spaCy not installed. NER disabled.")
 
 
21
 
22
 
23
  class NERAnalyzer:
24
  """
25
- Named Entity Recognition using spaCy.
26
 
27
- Supports:
28
- - French (fr_core_news_md)
29
- - English (en_core_web_sm)
30
  """
31
 
32
- # Entity type mapping with icons
33
- ENTITY_ICONS = {
34
- 'PERSON': '👤',
35
- 'PER': '👤',
36
- 'ORG': '🏢',
37
- 'GPE': '📍',
38
- 'LOC': '📍',
39
- 'DATE': '📅',
40
- 'TIME': '🕐',
41
- 'MONEY': '💰',
42
- 'MISC': '🏷️',
43
- 'NORP': '👥',
44
- 'FAC': '🏛️',
45
- 'PRODUCT': '📦',
46
- 'EVENT': '🎉',
47
- 'WORK_OF_ART': '🎨',
48
- 'LAW': '⚖️',
49
- 'LANGUAGE': '🗣️',
50
  }
51
 
52
- def __init__(self, language: str = 'en'):
53
  """
54
  Initialize NER analyzer.
55
 
56
  Args:
57
- language: 'en' or 'fr'
 
58
  """
59
- self.language = language
 
60
  self.nlp = None
61
- self.enabled = False
62
 
63
  if HAS_SPACY:
64
- self._load_model()
65
-
66
- def _load_model(self):
67
- """Load the appropriate spaCy model."""
68
- models = {
69
- 'en': ['en_core_web_sm', 'en_core_web_md'],
70
- 'fr': ['fr_core_news_md', 'fr_core_news_sm']
71
- }
72
-
73
- for model_name in models.get(self.language, models['en']):
74
  try:
75
  self.nlp = spacy.load(model_name)
76
- self.enabled = True
77
- print(f"[NER] Loaded model: {model_name}")
78
- break
79
- except OSError:
80
- continue
81
-
82
- if not self.enabled:
83
- print(f"[NER] No model found for language: {self.language}")
 
 
84
 
85
- def extract_entities(self, text: str) -> dict:
86
  """
87
  Extract named entities from text.
88
 
 
 
 
89
  Returns:
90
- {
91
- 'entities': [
92
- {'text': 'Emmanuel Macron', 'type': 'PERSON', 'icon': '👤'},
93
- ...
94
- ],
95
- 'summary': {
96
- 'PERSON': ['Emmanuel Macron'],
97
- 'ORG': ['UQAM', 'Google'],
98
- ...
99
- }
100
- }
101
  """
102
- if not self.enabled or not text:
103
- return {'entities': [], 'summary': {}}
104
 
 
 
 
 
 
 
 
 
 
105
  doc = self.nlp(text)
106
-
107
- entities = []
108
- summary = {}
109
- seen = set()
110
 
111
  for ent in doc.ents:
112
- # Avoid duplicates
113
- key = (ent.text.lower(), ent.label_)
114
- if key in seen:
115
- continue
116
- seen.add(key)
117
 
118
- entity = {
 
 
 
 
 
 
 
119
  'text': ent.text,
120
- 'type': ent.label_,
121
- 'icon': self.ENTITY_ICONS.get(ent.label_, '🏷️'),
122
  'start': ent.start_char,
123
- 'end': ent.end_char
 
 
 
 
124
  }
125
- entities.append(entity)
126
 
127
- # Group by type
128
- if ent.label_ not in summary:
129
- summary[ent.label_] = []
130
- summary[ent.label_].append(ent.text)
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
- return {
133
- 'entities': entities,
134
- 'summary': summary,
135
- 'count': len(entities)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
 
138
- def analyze_for_factcheck(self, text: str) -> dict:
139
  """
140
- Analyze text for fact-checking relevance.
141
 
142
- Returns entities with credibility hints.
 
 
 
 
143
  """
144
- result = self.extract_entities(text)
 
 
 
 
 
 
 
 
 
 
145
 
146
- # Add fact-checking hints
147
- hints = []
 
 
 
148
 
149
- for ent in result.get('entities', []):
150
- if ent['type'] in ['PERSON', 'PER']:
151
- hints.append(f"Verify claims about {ent['text']}")
152
- elif ent['type'] == 'ORG':
153
- hints.append(f"Check {ent['text']} official sources")
154
- elif ent['type'] in ['GPE', 'LOC']:
155
- hints.append(f"Verify location: {ent['text']}")
156
- elif ent['type'] == 'DATE':
157
- hints.append(f"Confirm date: {ent['text']}")
 
 
 
 
 
158
 
159
- result['fact_check_hints'] = hints[:5] # Top 5 hints
 
160
  return result
161
 
162
 
163
- # Singleton instance
164
- _analyzer = None
 
165
 
166
- def get_analyzer(language: str = 'en') -> NERAnalyzer:
167
- """Get or create the NER analyzer singleton."""
168
- global _analyzer
169
- if _analyzer is None:
170
- _analyzer = NERAnalyzer(language)
171
- return _analyzer
172
 
173
 
174
- # --- Testing ---
175
  if __name__ == "__main__":
176
- print("=" * 60)
177
- print("SysCRED NER Analyzer - Test")
178
- print("=" * 60)
179
-
180
- analyzer = NERAnalyzer('en')
181
 
182
  test_text = """
183
- Emmanuel Macron announced today that France will invest €500 million
184
- in AI research. The announcement was made at the UQAM in Montreal, Canada
185
- on February 8, 2026. Google and Microsoft also confirmed their participation.
186
  """
187
 
188
- result = analyzer.analyze_for_factcheck(test_text)
189
-
190
- print("\n--- Entities Found ---")
191
- for ent in result['entities']:
192
- print(f" {ent['icon']} {ent['text']} ({ent['type']})")
193
-
194
- print("\n--- Fact-Check Hints ---")
195
- for hint in result.get('fact_check_hints', []):
196
- print(f" • {hint}")
197
-
198
- print("\n" + "=" * 60)
 
1
+ #!/usr/bin/env python3
2
  # -*- coding: utf-8 -*-
3
  """
4
+ Named Entity Recognition (NER) Analyzer for SysCRED
5
+ ====================================================
6
+ Extracts named entities from text using spaCy.
7
 
8
+ Entities detected:
9
+ - PER: Persons (Donald Trump, Emmanuel Macron)
10
+ - ORG: Organizations (FBI, UN, Google)
11
+ - LOC: Locations (Paris, Capitol)
12
+ - DATE: Dates (January 6, 2021)
13
+ - MONEY: Amounts ($10 million)
14
+ - EVENT: Events (insurrection, election)
15
  """
16
 
17
+ from typing import Dict, List, Any, Optional
18
+ import logging
19
 
20
+ # Try to import spaCy
21
  try:
22
  import spacy
23
+ from spacy.language import Language
24
  HAS_SPACY = True
25
  except ImportError:
26
  HAS_SPACY = False
27
+ spacy = None
28
+
29
+ logger = logging.getLogger(__name__)
30
 
31
 
32
  class NERAnalyzer:
33
  """
34
+ Named Entity Recognition analyzer using spaCy.
35
 
36
+ Supports French (fr_core_news_md) and English (en_core_web_md).
37
+ Falls back to heuristic extraction if spaCy is not available.
 
38
  """
39
 
40
+ # Entity type mappings for display
41
+ ENTITY_LABELS = {
42
+ 'PER': {'fr': 'Personne', 'en': 'Person', 'emoji': '👤'},
43
+ 'PERSON': {'fr': 'Personne', 'en': 'Person', 'emoji': '👤'},
44
+ 'ORG': {'fr': 'Organisation', 'en': 'Organization', 'emoji': '🏢'},
45
+ 'LOC': {'fr': 'Lieu', 'en': 'Location', 'emoji': '📍'},
46
+ 'GPE': {'fr': 'Lieu géopolitique', 'en': 'Geopolitical', 'emoji': '🌍'},
47
+ 'DATE': {'fr': 'Date', 'en': 'Date', 'emoji': '📅'},
48
+ 'TIME': {'fr': 'Heure', 'en': 'Time', 'emoji': '⏰'},
49
+ 'MONEY': {'fr': 'Montant', 'en': 'Money', 'emoji': '💰'},
50
+ 'PERCENT': {'fr': 'Pourcentage', 'en': 'Percent', 'emoji': '📊'},
51
+ 'EVENT': {'fr': 'Événement', 'en': 'Event', 'emoji': '📰'},
52
+ 'PRODUCT': {'fr': 'Produit', 'en': 'Product', 'emoji': '📦'},
53
+ 'LAW': {'fr': 'Loi', 'en': 'Law', 'emoji': '⚖️'},
54
+ 'NORP': {'fr': 'Groupe', 'en': 'Group', 'emoji': '👥'},
55
+ 'MISC': {'fr': 'Divers', 'en': 'Miscellaneous', 'emoji': '🔖'},
 
 
56
  }
57
 
58
+ def __init__(self, model_name: str = "fr_core_news_md", fallback: bool = True):
59
  """
60
  Initialize NER analyzer.
61
 
62
  Args:
63
+ model_name: spaCy model to load (fr_core_news_md, en_core_web_md)
64
+ fallback: If True, use heuristics when spaCy unavailable
65
  """
66
+ self.model_name = model_name
67
+ self.fallback = fallback
68
  self.nlp = None
69
+ self.use_heuristics = False
70
 
71
  if HAS_SPACY:
 
 
 
 
 
 
 
 
 
 
72
  try:
73
  self.nlp = spacy.load(model_name)
74
+ logger.info(f"[NER] Loaded spaCy model: {model_name}")
75
+ except OSError as e:
76
+ logger.warning(f"[NER] Could not load model {model_name}: {e}")
77
+ if fallback:
78
+ self.use_heuristics = True
79
+ logger.info("[NER] Using heuristic entity extraction")
80
+ else:
81
+ if fallback:
82
+ self.use_heuristics = True
83
+ logger.info("[NER] spaCy not installed. Using heuristic extraction")
84
 
85
+ def extract_entities(self, text: str) -> Dict[str, List[Dict[str, Any]]]:
86
  """
87
  Extract named entities from text.
88
 
89
+ Args:
90
+ text: Input text to analyze
91
+
92
  Returns:
93
+ Dictionary mapping entity types to lists of entities
94
+ Each entity has: text, start, end, label, label_display, emoji, confidence
 
 
 
 
 
 
 
 
 
95
  """
96
+ if not text or len(text.strip()) == 0:
97
+ return {}
98
 
99
+ if self.nlp:
100
+ return self._extract_with_spacy(text)
101
+ elif self.use_heuristics:
102
+ return self._extract_with_heuristics(text)
103
+ else:
104
+ return {}
105
+
106
+ def _extract_with_spacy(self, text: str) -> Dict[str, List[Dict[str, Any]]]:
107
+ """Extract entities using spaCy NLP."""
108
  doc = self.nlp(text)
109
+ entities: Dict[str, List[Dict[str, Any]]] = {}
 
 
 
110
 
111
  for ent in doc.ents:
112
+ label = ent.label_
 
 
 
 
113
 
114
+ # Get display info
115
+ label_info = self.ENTITY_LABELS.get(label, {
116
+ 'fr': label,
117
+ 'en': label,
118
+ 'emoji': '🔖'
119
+ })
120
+
121
+ entity_data = {
122
  'text': ent.text,
 
 
123
  'start': ent.start_char,
124
+ 'end': ent.end_char,
125
+ 'label': label,
126
+ 'label_display': label_info.get('fr', label),
127
+ 'emoji': label_info.get('emoji', '🔖'),
128
+ 'confidence': 0.85 # spaCy doesn't provide confidence by default
129
  }
 
130
 
131
+ if label not in entities:
132
+ entities[label] = []
133
+
134
+ # Avoid duplicates
135
+ if not any(e['text'].lower() == entity_data['text'].lower() for e in entities[label]):
136
+ entities[label].append(entity_data)
137
+
138
+ return entities
139
+
140
+ def _extract_with_heuristics(self, text: str) -> Dict[str, List[Dict[str, Any]]]:
141
+ """
142
+ Fallback heuristic entity extraction.
143
+ Uses pattern matching for common entities.
144
+ """
145
+ import re
146
+ entities: Dict[str, List[Dict[str, Any]]] = {}
147
 
148
+ # Common patterns
149
+ patterns = {
150
+ 'PER': [
151
+ # Known political figures
152
+ r'\b(Donald Trump|Joe Biden|Emmanuel Macron|Hillary Clinton|Barack Obama|'
153
+ r'Vladimir Putin|Angela Merkel|Justin Trudeau|Boris Johnson)\b',
154
+ ],
155
+ 'ORG': [
156
+ r'\b(FBI|CIA|NSA|ONU|NATO|OTAN|Google|Facebook|Twitter|Meta|'
157
+ r'Amazon|Microsoft|Apple|CNN|BBC|Le Monde|New York Times|'
158
+ r'Parti Républicain|Parti Démocrate|Republican Party|Democratic Party)\b',
159
+ ],
160
+ 'LOC': [
161
+ r'\b(Capitol|White House|Maison Blanche|Kremlin|Élysée|Pentagon|'
162
+ r'New York|Washington|Paris|Londres|Moscou|Berlin|Beijing)\b',
163
+ ],
164
+ 'DATE': [
165
+ r'\b(\d{1,2}\s+(janvier|février|mars|avril|mai|juin|juillet|août|'
166
+ r'septembre|octobre|novembre|décembre)\s+\d{4})\b',
167
+ r'\b(\d{1,2}[-/]\d{1,2}[-/]\d{2,4})\b',
168
+ r'\b(January|February|March|April|May|June|July|August|'
169
+ r'September|October|November|December)\s+\d{1,2},?\s+\d{4}\b',
170
+ ],
171
+ 'MONEY': [
172
+ r'\$[\d,]+(?:\.\d{2})?(?:\s*(?:million|billion|trillion))?',
173
+ r'[\d,]+(?:\.\d{2})?\s*(?:dollars?|euros?|€|\$)',
174
+ r'[\d,]+\s*(?:million|milliard)s?\s*(?:de\s+)?(?:dollars?|euros?)',
175
+ ],
176
+ 'PERCENT': [
177
+ r'\b\d+(?:\.\d+)?%',
178
+ r'\b\d+(?:\.\d+)?\s*pour\s*cent',
179
+ r'\b\d+(?:\.\d+)?\s*percent',
180
+ ],
181
  }
182
+
183
+ for label, pattern_list in patterns.items():
184
+ label_info = self.ENTITY_LABELS.get(label, {'fr': label, 'emoji': '🔖'})
185
+
186
+ for pattern in pattern_list:
187
+ for match in re.finditer(pattern, text, re.IGNORECASE):
188
+ entity_data = {
189
+ 'text': match.group(),
190
+ 'start': match.start(),
191
+ 'end': match.end(),
192
+ 'label': label,
193
+ 'label_display': label_info.get('fr', label),
194
+ 'emoji': label_info.get('emoji', '🔖'),
195
+ 'confidence': 0.70 # Lower confidence for heuristics
196
+ }
197
+
198
+ if label not in entities:
199
+ entities[label] = []
200
+
201
+ # Avoid duplicates
202
+ if not any(e['text'].lower() == entity_data['text'].lower()
203
+ for e in entities[label]):
204
+ entities[label].append(entity_data)
205
+
206
+ return entities
207
 
208
+ def get_entity_summary(self, entities: Dict[str, List[Dict[str, Any]]]) -> str:
209
  """
210
+ Generate a human-readable summary of extracted entities.
211
 
212
+ Args:
213
+ entities: Dictionary of entities from extract_entities()
214
+
215
+ Returns:
216
+ Formatted string summary
217
  """
218
+ if not entities:
219
+ return "Aucune entité nommée détectée."
220
+
221
+ lines = []
222
+ for label, ent_list in entities.items():
223
+ label_info = self.ENTITY_LABELS.get(label, {'fr': label, 'emoji': '🔖'})
224
+ emoji = label_info.get('emoji', '🔖')
225
+ label_display = label_info.get('fr', label)
226
+
227
+ entity_texts = [e['text'] for e in ent_list[:5]] # Limit to 5
228
+ lines.append(f"{emoji} {label_display}: {', '.join(entity_texts)}")
229
 
230
+ return "\n".join(lines)
231
+
232
+ def to_frontend_format(self, entities: Dict[str, List[Dict[str, Any]]]) -> List[Dict]:
233
+ """
234
+ Convert entities to frontend-friendly format.
235
 
236
+ Returns:
237
+ List of entities with all info for display
238
+ """
239
+ result = []
240
+ for label, ent_list in entities.items():
241
+ for ent in ent_list:
242
+ result.append({
243
+ 'text': ent['text'],
244
+ 'type': ent['label'],
245
+ 'type_display': ent.get('label_display', ent['label']),
246
+ 'emoji': ent.get('emoji', '🔖'),
247
+ 'confidence': ent.get('confidence', 0.5),
248
+ 'confidence_pct': f"{int(ent.get('confidence', 0.5) * 100)}%"
249
+ })
250
 
251
+ # Sort by confidence
252
+ result.sort(key=lambda x: x['confidence'], reverse=True)
253
  return result
254
 
255
 
256
+ # Singleton instance for easy import
257
+ _ner_analyzer: Optional[NERAnalyzer] = None
258
+
259
 
260
+ def get_ner_analyzer(model_name: str = "fr_core_news_md") -> NERAnalyzer:
261
+ """Get or create singleton NER analyzer instance."""
262
+ global _ner_analyzer
263
+ if _ner_analyzer is None:
264
+ _ner_analyzer = NERAnalyzer(model_name=model_name, fallback=True)
265
+ return _ner_analyzer
266
 
267
 
268
+ # Quick test
269
  if __name__ == "__main__":
270
+ analyzer = NERAnalyzer(fallback=True)
 
 
 
 
271
 
272
  test_text = """
273
+ Donald Trump a affirmé que l'insurrection du 6 janvier 2021 au Capitol n'est jamais arrivée.
274
+ Le FBI enquête sur les événements. Le président Joe Biden a condamné ces déclarations à Washington.
275
+ Les dégâts sont estimés à 30 millions de dollars.
276
  """
277
 
278
+ entities = analyzer.extract_entities(test_text)
279
+ print("=== Entités détectées ===")
280
+ print(analyzer.get_entity_summary(entities))
281
+ print("\n=== Format Frontend ===")
282
+ for e in analyzer.to_frontend_format(entities):
283
+ print(f" {e['emoji']} {e['text']} ({e['type_display']}, {e['confidence_pct']})")
 
 
 
 
 
syscred/ontology_manager.py CHANGED
@@ -47,7 +47,7 @@ class OntologyManager:
47
  """
48
 
49
  # Namespace for the credibility ontology
50
- CRED_NS = "https://github.com/DominiqueLoyer/systemFactChecking#"
51
 
52
  def __init__(self, base_ontology_path: Optional[str] = None, data_path: Optional[str] = None):
53
  """
@@ -254,7 +254,7 @@ class OntologyManager:
254
 
255
  # SPARQL query to find all evaluations for this URL
256
  query = """
257
- PREFIX cred: <http://www.dic9335.uqam.ca/ontologies/credibility-verification#>
258
  PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
259
 
260
  SELECT ?report ?score ?level ?timestamp ?content
@@ -298,7 +298,7 @@ class OntologyManager:
298
 
299
  # Count evaluations
300
  query = """
301
- PREFIX cred: <http://www.dic9335.uqam.ca/ontologies/credibility-verification#>
302
  SELECT (COUNT(?report) as ?count) WHERE {
303
  ?report a cred:RapportEvaluation .
304
  }
@@ -321,7 +321,7 @@ class OntologyManager:
321
 
322
  # Get the latest report ID
323
  latest_query = """
324
- PREFIX cred: <https://github.com/DominiqueLoyer/systemFactChecking#>
325
  SELECT ?report ?timestamp WHERE {
326
  ?report a cred:RapportEvaluation .
327
  ?report cred:completionTimestamp ?timestamp .
@@ -355,7 +355,7 @@ class OntologyManager:
355
 
356
  # Query triples related to this report (Level 1)
357
  related_query = """
358
- PREFIX cred: <https://github.com/DominiqueLoyer/systemFactChecking#>
359
  SELECT ?p ?o ?oType ?oLabel WHERE {
360
  <%s> ?p ?o .
361
  OPTIONAL { ?o a ?oType } .
@@ -463,8 +463,8 @@ if __name__ == "__main__":
463
  print("=== Testing OntologyManager ===\n")
464
 
465
  # Test with base ontology
466
- base_path = "/Users/bk280625/documents041025/MonCode/sysCRED_onto26avrtil.ttl"
467
- data_path = "/Users/bk280625/documents041025/MonCode/ontology/sysCRED_data.ttl"
468
 
469
  manager = OntologyManager(base_ontology_path=base_path, data_path=None)
470
 
 
47
  """
48
 
49
  # Namespace for the credibility ontology
50
+ CRED_NS = "https://syscred.uqam.ca/ontology#"
51
 
52
  def __init__(self, base_ontology_path: Optional[str] = None, data_path: Optional[str] = None):
53
  """
 
254
 
255
  # SPARQL query to find all evaluations for this URL
256
  query = """
257
+ PREFIX cred: <https://syscred.uqam.ca/ontology#>
258
  PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
259
 
260
  SELECT ?report ?score ?level ?timestamp ?content
 
298
 
299
  # Count evaluations
300
  query = """
301
+ PREFIX cred: <https://syscred.uqam.ca/ontology#>
302
  SELECT (COUNT(?report) as ?count) WHERE {
303
  ?report a cred:RapportEvaluation .
304
  }
 
321
 
322
  # Get the latest report ID
323
  latest_query = """
324
+ PREFIX cred: <https://syscred.uqam.ca/ontology#>
325
  SELECT ?report ?timestamp WHERE {
326
  ?report a cred:RapportEvaluation .
327
  ?report cred:completionTimestamp ?timestamp .
 
355
 
356
  # Query triples related to this report (Level 1)
357
  related_query = """
358
+ PREFIX cred: <https://syscred.uqam.ca/ontology#>
359
  SELECT ?p ?o ?oType ?oLabel WHERE {
360
  <%s> ?p ?o .
361
  OPTIONAL { ?o a ?oType } .
 
463
  print("=== Testing OntologyManager ===\n")
464
 
465
  # Test with base ontology
466
+ base_path = os.path.join(os.path.dirname(__file__), '..', 'ontology', 'sysCRED_onto26avrtil.ttl')
467
+ data_path = os.path.join(os.path.dirname(__file__), '..', 'ontology', 'sysCRED_data.ttl')
468
 
469
  manager = OntologyManager(base_ontology_path=base_path, data_path=None)
470
 
syscred/verification_system.py CHANGED
@@ -33,28 +33,35 @@ except ImportError:
33
  HAS_SBERT = False
34
  print("Warning: sentence-transformers not installed. Semantic coherence will use heuristics.")
35
 
36
- # Local imports
37
- from syscred.api_clients import ExternalAPIClients, WebContent, ExternalData
38
- from syscred.ontology_manager import OntologyManager
39
- from syscred.seo_analyzer import SEOAnalyzer
40
- from syscred.graph_rag import GraphRAG # [NEW] GraphRAG
41
- from syscred.trec_retriever import TRECRetriever, Evidence, RetrievalResult # [NEW] TREC Integration
42
- from syscred import config
43
-
44
- # [NEW] NER and E-E-A-T modules
45
  try:
46
- from syscred.ner_analyzer import NERAnalyzer, get_ner_analyzer
47
- HAS_NER = True
 
 
 
 
48
  except ImportError:
49
- HAS_NER = False
50
- print("[SysCRED] Warning: NER module not available")
 
 
 
 
51
 
 
 
52
  try:
 
53
  from syscred.eeat_calculator import EEATCalculator, EEATScore
54
- HAS_EEAT = True
55
  except ImportError:
56
- HAS_EEAT = False
57
- print("[SysCRED] Warning: E-E-A-T module not available")
 
 
 
 
58
 
59
 
60
  class CredibilityVerificationSystem:
@@ -136,6 +143,18 @@ class CredibilityVerificationSystem:
136
  # Weights for score calculation (Loaded from Config)
137
  self.weights = config.Config.SCORE_WEIGHTS
138
  print(f"[SysCRED] Using weights: {self.weights}")
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
  print("[SysCRED] System ready!")
141
 
@@ -144,40 +163,47 @@ class CredibilityVerificationSystem:
144
  print("[SysCRED] Loading ML models (this may take a moment)...")
145
 
146
  try:
147
- # Sentiment analysis
148
  self.sentiment_pipeline = pipeline(
149
- "sentiment-analysis",
150
- model="distilbert-base-uncased-finetuned-sst-2-english"
 
 
151
  )
152
- print("[SysCRED] ✓ Sentiment model loaded")
153
  except Exception as e:
154
  print(f"[SysCRED] ✗ Sentiment model failed: {e}")
155
-
156
  try:
157
- # NER pipeline
158
- self.ner_pipeline = pipeline("ner", grouped_entities=True)
159
- print("[SysCRED] ✓ NER model loaded")
 
 
 
 
 
 
160
  except Exception as e:
161
  print(f"[SysCRED] ✗ NER model failed: {e}")
162
-
163
  try:
164
- # Bias detection - Specialized model
165
- # Using 'd4data/bias-detection-model' or fallback to generic
166
- bias_model_name = "d4data/bias-detection-model"
167
  self.bias_tokenizer = AutoTokenizer.from_pretrained(bias_model_name)
168
  self.bias_model = AutoModelForSequenceClassification.from_pretrained(bias_model_name)
169
- print("[SysCRED] ✓ Bias model loaded (d4data)")
170
  except Exception as e:
171
  print(f"[SysCRED] ✗ Bias model failed: {e}. Using heuristics.")
172
 
173
  try:
174
- # Semantic Coherence
175
  if HAS_SBERT:
176
  self.coherence_model = SentenceTransformer('all-MiniLM-L6-v2')
177
- print("[SysCRED] ✓ Coherence model loaded (SBERT)")
178
  except Exception as e:
179
  print(f"[SysCRED] ✗ Coherence model failed: {e}")
180
-
181
  try:
182
  # LIME explainer
183
  self.explainer = LimeTextExplainer(class_names=['NEGATIVE', 'POSITIVE'])
@@ -338,21 +364,6 @@ class CredibilityVerificationSystem:
338
 
339
  # 4. Semantic Coherence
340
  results['coherence_score'] = self._calculate_coherence(text)
341
-
342
- # 5. [NEW] E-E-A-T Score Calculation
343
- if HAS_EEAT:
344
- try:
345
- # Initialize calc if needed (lazy load)
346
- if not hasattr(self, 'eeat_calculator') or self.eeat_calculator is None:
347
- self.eeat_calculator = EEATCalculator()
348
-
349
- # Calculate score
350
- eeat = self.eeat_calculator.calculate_eeat(text, results.get('named_entities', []))
351
- # Store in results as dict
352
- results['eeat_score'] = eeat.to_dict()
353
- print(f"[NLP] EEAT Score calculated: {eeat.overall_score:.2f}")
354
- except Exception as e:
355
- print(f"[NLP] EEAT error: {e}")
356
 
357
  return results
358
 
@@ -516,6 +527,26 @@ class CredibilityVerificationSystem:
516
  adjustment_factor = (graph_score - 0.5) * w_graph * confidence
517
  adjustments += adjustment_factor
518
  total_weight_used += w_graph * confidence # Partial weight based on confidence
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
519
 
520
  # Final calculation
521
  # Base 0.5 + sum of weighted adjustments
@@ -672,11 +703,24 @@ class CredibilityVerificationSystem:
672
  ) -> Dict[str, Any]:
673
  """Generate the final evaluation report."""
674
 
 
 
 
 
 
 
 
 
 
 
 
 
675
  report = {
676
  'idRapport': f"report_{int(datetime.datetime.now().timestamp())}",
677
  'informationEntree': input_data,
678
  'dateGeneration': datetime.datetime.now().isoformat(),
679
  'scoreCredibilite': round(overall_score, 2),
 
680
  'resumeAnalyse': "",
681
  'detailsScore': {
682
  'base': 0.5,
@@ -703,8 +747,6 @@ class CredibilityVerificationSystem:
703
  },
704
  # [NEW] TREC Evidence section
705
  'evidences': evidences or [],
706
- # [NEW] TREC IR Metrics for dashboard
707
- 'trec_metrics': self._calculate_trec_metrics(cleaned_text, evidences),
708
  'metadonnees': {}
709
  }
710
 
@@ -758,7 +800,7 @@ class CredibilityVerificationSystem:
758
  })
759
  report['sourcesUtilisees'].append({
760
  'type': 'Fact Check API',
761
- 'results_count': len(external_data.fact_checks) if external_data.fact_checks else 0
762
  })
763
  # [NEW] Add TREC evidence source
764
  if evidences:
@@ -768,112 +810,9 @@ class CredibilityVerificationSystem:
768
  'corpus': 'AP88-90',
769
  'results_count': len(evidences)
770
  })
771
-
772
- # [FIX] Add explicit fields for frontend
773
- if nlp_results.get('named_entities'):
774
- report['ner_entities'] = nlp_results.get('named_entities')
775
-
776
- # Add EEAT score if available (from rule_results or nlp_results)
777
- if 'eeat_score' in rule_results:
778
- report['eeat_score'] = rule_results['eeat_score']
779
- elif 'eeat_score' in nlp_results:
780
- report['eeat_score'] = nlp_results['eeat_score']
781
 
782
  return report
783
 
784
- def _calculate_trec_metrics(self, text: str, evidences: List[Dict[str, Any]] = None) -> Dict[str, float]:
785
- """
786
- Calculate TREC-style IR metrics for display on dashboard.
787
-
788
- Computes:
789
- - Precision: Ratio of relevant retrieved documents
790
- - Recall: Ratio of relevant documents retrieved
791
- - MAP: Mean Average Precision
792
- - NDCG: Normalized Discounted Cumulative Gain
793
- - TF-IDF: Term Frequency-Inverse Document Frequency score
794
- - MRR: Mean Reciprocal Rank
795
- """
796
- import math
797
-
798
- metrics = {
799
- 'precision': 0.0,
800
- 'recall': 0.0,
801
- 'map': 0.0,
802
- 'ndcg': 0.0,
803
- 'tfidf': 0.0,
804
- 'mrr': 0.0
805
- }
806
-
807
- if not text:
808
- return metrics
809
-
810
- # TF-IDF based on text analysis
811
- words = text.lower().split()
812
- if words:
813
- # Simple TF calculation
814
- word_counts = {}
815
- for word in words:
816
- word_counts[word] = word_counts.get(word, 0) + 1
817
-
818
- # Calculate TF-IDF score (simplified)
819
- total_words = len(words)
820
- unique_words = len(word_counts)
821
-
822
- # Term frequency normalized
823
- tf_scores = [count / total_words for count in word_counts.values()]
824
- # IDF approximation based on word distribution
825
- idf_approx = math.log((unique_words + 1) / 2)
826
-
827
- tfidf_sum = sum(tf * idf_approx for tf in tf_scores)
828
- metrics['tfidf'] = min(1.0, tfidf_sum / max(1, unique_words) * 10)
829
-
830
- # If we have evidences, calculate retrieval metrics
831
- if evidences and len(evidences) > 0:
832
- k = len(evidences)
833
-
834
- # For now, assume all retrieved evidences have some relevance
835
- # based on their retrieval scores
836
- scores = [e.get('score', 0) for e in evidences]
837
-
838
- if scores:
839
- avg_score = sum(scores) / len(scores)
840
- max_score = max(scores)
841
-
842
- # Precision at K (proxy: avg relevance score)
843
- metrics['precision'] = min(1.0, avg_score if avg_score <= 1.0 else avg_score / max(1, max_score))
844
-
845
- # Recall (proxy: coverage based on number of evidences)
846
- metrics['recall'] = min(1.0, len(evidences) / 10) # Assuming 10 is target
847
-
848
- # MAP (proxy using score ranking)
849
- ap_sum = 0.0
850
- for i, score in enumerate(sorted(scores, reverse=True)):
851
- ap_sum += (i + 1) / (i + 2) * score if score <= 1.0 else (i + 1) / (i + 2)
852
- metrics['map'] = ap_sum / len(scores) if scores else 0.0
853
-
854
- # NDCG (simplified)
855
- dcg = sum(
856
- (2 ** (score if score <= 1.0 else 1.0) - 1) / math.log2(i + 2)
857
- for i, score in enumerate(scores[:k])
858
- )
859
- ideal_scores = sorted(scores, reverse=True)
860
- idcg = sum(
861
- (2 ** (score if score <= 1.0 else 1.0) - 1) / math.log2(i + 2)
862
- for i, score in enumerate(ideal_scores[:k])
863
- )
864
- metrics['ndcg'] = dcg / idcg if idcg > 0 else 0.0
865
-
866
- # MRR (first relevant result)
867
- for i, score in enumerate(scores):
868
- if (score > 0.5 if score <= 1.0 else score > max_score / 2):
869
- metrics['mrr'] = 1.0 / (i + 1)
870
- break
871
- if metrics['mrr'] == 0 and len(scores) > 0:
872
- metrics['mrr'] = 1.0 # First result
873
-
874
- # Round all values
875
- return {k: round(v, 4) for k, v in metrics.items()}
876
-
877
  def _get_score_factors(self, rule_results: Dict, nlp_results: Dict) -> List[Dict]:
878
  """Get list of factors that influenced the score (For UI)."""
879
  factors = []
@@ -1034,6 +973,40 @@ class CredibilityVerificationSystem:
1034
  print("[SysCRED] Running NLP analysis...")
1035
  nlp_results = self.nlp_analysis(cleaned_text)
1036
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1037
  # 7. Calculate score (Now includes GraphRAG context)
1038
  overall_score = self.calculate_overall_score(rule_results, nlp_results)
1039
  print(f"[SysCRED] ✓ Credibility score: {overall_score:.2f}")
@@ -1045,6 +1018,10 @@ class CredibilityVerificationSystem:
1045
  graph_context=graph_context
1046
  )
1047
 
 
 
 
 
1048
  # Add similar URIs to report for ontology linking
1049
  if similar_uris:
1050
  report['similar_claims_uris'] = similar_uris
 
33
  HAS_SBERT = False
34
  print("Warning: sentence-transformers not installed. Semantic coherence will use heuristics.")
35
 
36
+ # Local imports - Support both syscred.module and relative imports
 
 
 
 
 
 
 
 
37
  try:
38
+ from syscred.api_clients import ExternalAPIClients, WebContent, ExternalData
39
+ from syscred.ontology_manager import OntologyManager
40
+ from syscred.seo_analyzer import SEOAnalyzer
41
+ from syscred.graph_rag import GraphRAG
42
+ from syscred.trec_retriever import TRECRetriever, Evidence, RetrievalResult
43
+ from syscred import config
44
  except ImportError:
45
+ from api_clients import ExternalAPIClients, WebContent, ExternalData
46
+ from ontology_manager import OntologyManager
47
+ from seo_analyzer import SEOAnalyzer
48
+ from graph_rag import GraphRAG
49
+ from trec_retriever import TRECRetriever, Evidence, RetrievalResult
50
+ import config
51
 
52
+ # [NER + E-E-A-T] Imports optionnels - n'interferent pas avec les imports principaux
53
+ HAS_NER_EEAT = False
54
  try:
55
+ from syscred.ner_analyzer import NERAnalyzer
56
  from syscred.eeat_calculator import EEATCalculator, EEATScore
57
+ HAS_NER_EEAT = True
58
  except ImportError:
59
+ try:
60
+ from ner_analyzer import NERAnalyzer
61
+ from eeat_calculator import EEATCalculator, EEATScore
62
+ HAS_NER_EEAT = True
63
+ except ImportError:
64
+ pass
65
 
66
 
67
  class CredibilityVerificationSystem:
 
143
  # Weights for score calculation (Loaded from Config)
144
  self.weights = config.Config.SCORE_WEIGHTS
145
  print(f"[SysCRED] Using weights: {self.weights}")
146
+
147
+ # [NER + E-E-A-T] Initialize analyzers
148
+ self.ner_analyzer = None
149
+ self.eeat_calculator = None
150
+ if HAS_NER_EEAT:
151
+ try:
152
+ self.ner_analyzer = NERAnalyzer()
153
+ self.eeat_calculator = EEATCalculator()
154
+ print("[SysCRED] NER analyzer initialized")
155
+ print("[SysCRED] E-E-A-T calculator initialized")
156
+ except Exception as e:
157
+ print(f"[SysCRED] NER/E-E-A-T init failed: {e}")
158
 
159
  print("[SysCRED] System ready!")
160
 
 
163
  print("[SysCRED] Loading ML models (this may take a moment)...")
164
 
165
  try:
166
+ # Sentiment analysis - modèle ultra-léger
167
  self.sentiment_pipeline = pipeline(
168
+ "sentiment-analysis",
169
+ model="distilbert-base-uncased-finetuned-sst-2-english",
170
+ device=-1,
171
+ model_kwargs={"low_cpu_mem_usage": True}
172
  )
173
+ print("[SysCRED] ✓ Sentiment model loaded (distilbert-base)")
174
  except Exception as e:
175
  print(f"[SysCRED] ✗ Sentiment model failed: {e}")
176
+
177
  try:
178
+ # NER pipeline - modèle plus léger
179
+ self.ner_pipeline = pipeline(
180
+ "ner",
181
+ model="dslim/bert-base-NER",
182
+ grouped_entities=True,
183
+ device=-1,
184
+ model_kwargs={"low_cpu_mem_usage": True}
185
+ )
186
+ print("[SysCRED] ✓ NER model loaded (dslim/bert-base-NER)")
187
  except Exception as e:
188
  print(f"[SysCRED] ✗ NER model failed: {e}")
189
+
190
  try:
191
+ # Bias detection - modèle plus léger si possible
192
+ bias_model_name = "typeform/distilbert-base-uncased-mnli"
 
193
  self.bias_tokenizer = AutoTokenizer.from_pretrained(bias_model_name)
194
  self.bias_model = AutoModelForSequenceClassification.from_pretrained(bias_model_name)
195
+ print("[SysCRED] ✓ Bias model loaded (distilbert-mnli)")
196
  except Exception as e:
197
  print(f"[SysCRED] ✗ Bias model failed: {e}. Using heuristics.")
198
 
199
  try:
200
+ # Semantic Coherence - modèle MiniLM (déjà léger)
201
  if HAS_SBERT:
202
  self.coherence_model = SentenceTransformer('all-MiniLM-L6-v2')
203
+ print("[SysCRED] ✓ Coherence model loaded (SBERT MiniLM)")
204
  except Exception as e:
205
  print(f"[SysCRED] ✗ Coherence model failed: {e}")
206
+
207
  try:
208
  # LIME explainer
209
  self.explainer = LimeTextExplainer(class_names=['NEGATIVE', 'POSITIVE'])
 
364
 
365
  # 4. Semantic Coherence
366
  results['coherence_score'] = self._calculate_coherence(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
367
 
368
  return results
369
 
 
527
  adjustment_factor = (graph_score - 0.5) * w_graph * confidence
528
  adjustments += adjustment_factor
529
  total_weight_used += w_graph * confidence # Partial weight based on confidence
530
+
531
+ # 8. [NEW] Linguistic Markers Analysis (sensationalism penalty)
532
+ # Penalize sensational language heavily, reward doubt markers (critical thinking)
533
+ linguistic = rule_results.get('linguistic_markers', {})
534
+ sensationalism_count = linguistic.get('sensationalism', 0)
535
+ doubt_count = linguistic.get('doubt', 0)
536
+ certainty_count = linguistic.get('certainty', 0)
537
+
538
+ # Sensationalism is a strong negative signal
539
+ if sensationalism_count > 0:
540
+ penalty = min(0.20, sensationalism_count * 0.05) # Max 20% penalty
541
+ adjustments -= penalty
542
+
543
+ # Excessive certainty without sources is suspicious
544
+ if certainty_count > 2 and not fact_checks:
545
+ adjustments -= 0.05
546
+
547
+ # Doubt markers indicate critical/questioning tone (slight positive)
548
+ if doubt_count > 0:
549
+ adjustments += min(0.05, doubt_count * 0.02)
550
 
551
  # Final calculation
552
  # Base 0.5 + sum of weighted adjustments
 
703
  ) -> Dict[str, Any]:
704
  """Generate the final evaluation report."""
705
 
706
+ # Determine credibility level
707
+ if overall_score >= 0.75:
708
+ niveau = "Élevée"
709
+ elif overall_score >= 0.55:
710
+ niveau = "Moyenne-Élevée"
711
+ elif overall_score >= 0.45:
712
+ niveau = "Moyenne"
713
+ elif overall_score >= 0.25:
714
+ niveau = "Faible-Moyenne"
715
+ else:
716
+ niveau = "Faible"
717
+
718
  report = {
719
  'idRapport': f"report_{int(datetime.datetime.now().timestamp())}",
720
  'informationEntree': input_data,
721
  'dateGeneration': datetime.datetime.now().isoformat(),
722
  'scoreCredibilite': round(overall_score, 2),
723
+ 'niveauCredibilite': niveau,
724
  'resumeAnalyse': "",
725
  'detailsScore': {
726
  'base': 0.5,
 
747
  },
748
  # [NEW] TREC Evidence section
749
  'evidences': evidences or [],
 
 
750
  'metadonnees': {}
751
  }
752
 
 
800
  })
801
  report['sourcesUtilisees'].append({
802
  'type': 'Fact Check API',
803
+ 'results_count': len(external_data.fact_checks)
804
  })
805
  # [NEW] Add TREC evidence source
806
  if evidences:
 
810
  'corpus': 'AP88-90',
811
  'results_count': len(evidences)
812
  })
 
 
 
 
 
 
 
 
 
 
813
 
814
  return report
815
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
816
  def _get_score_factors(self, rule_results: Dict, nlp_results: Dict) -> List[Dict]:
817
  """Get list of factors that influenced the score (For UI)."""
818
  factors = []
 
973
  print("[SysCRED] Running NLP analysis...")
974
  nlp_results = self.nlp_analysis(cleaned_text)
975
 
976
+ # 6.5 [NER] Named Entity Recognition
977
+ ner_entities = {}
978
+ if self.ner_analyzer and cleaned_text:
979
+ try:
980
+ ner_entities = self.ner_analyzer.extract_entities(cleaned_text)
981
+ total = sum(len(v) for v in ner_entities.values() if isinstance(v, list))
982
+ print(f"[SysCRED] NER: {total} entites detectees")
983
+ except Exception as e:
984
+ print(f"[SysCRED] NER failed: {e}")
985
+
986
+ # 6.6 [E-E-A-T] Experience-Expertise-Authority-Trust scoring
987
+ eeat_scores = {}
988
+ if self.eeat_calculator:
989
+ try:
990
+ url_for_eeat = input_data if is_url else ""
991
+ domain_age_years = None
992
+ if external_data.domain_age_days:
993
+ domain_age_years = external_data.domain_age_days / 365.0
994
+
995
+ eeat_raw = self.eeat_calculator.calculate(
996
+ url=url_for_eeat,
997
+ text=cleaned_text,
998
+ nlp_analysis=nlp_results,
999
+ fact_checks=rule_results.get('fact_checking', []),
1000
+ domain_age_years=domain_age_years,
1001
+ has_https=input_data.startswith("https://") if is_url else False
1002
+ )
1003
+ eeat_scores = eeat_raw.to_dict() if hasattr(eeat_raw, 'to_dict') else (
1004
+ eeat_raw if isinstance(eeat_raw, dict) else vars(eeat_raw)
1005
+ )
1006
+ print(f"[SysCRED] E-E-A-T score: {eeat_scores.get('overall', 'N/A')}")
1007
+ except Exception as e:
1008
+ print(f"[SysCRED] E-E-A-T failed: {e}")
1009
+
1010
  # 7. Calculate score (Now includes GraphRAG context)
1011
  overall_score = self.calculate_overall_score(rule_results, nlp_results)
1012
  print(f"[SysCRED] ✓ Credibility score: {overall_score:.2f}")
 
1018
  graph_context=graph_context
1019
  )
1020
 
1021
+ # [NER + E-E-A-T] Always include in report (even if empty)
1022
+ report['ner_entities'] = ner_entities
1023
+ report['eeat_scores'] = eeat_scores
1024
+
1025
  # Add similar URIs to report for ontology linking
1026
  if similar_uris:
1027
  report['similar_claims_uris'] = similar_uris