D Ф m i И i q ц e L Ф y e r commited on
Commit
cedf5ef
·
1 Parent(s): 2b06f78

fix: unify namespace to syscred.uqam.ca/ontology#, fix NER/E-E-A-T/ontology permissions

Browse files

- graph_rag.py: 3 SPARQL queries → syscred.uqam.ca/ontology#
- ontology_manager.py: namespace + os.makedirs for save
- verification_system.py: NER uses original text, E-E-A-T domain_age guard
- Dockerfile: chmod 777 /app/ontology, copy .env
- Added sysCRED_data.ttl (4739 lines, Sandbox copy)

Dockerfile CHANGED
@@ -28,6 +28,10 @@ RUN python -m spacy download fr_core_news_md || true
28
  # Copy application code
29
  COPY syscred/ /app/syscred/
30
  COPY ontology/ /app/ontology/
 
 
 
 
31
 
32
  # Create user for HF Spaces (required)
33
  RUN useradd -m -u 1000 user
 
28
  # Copy application code
29
  COPY syscred/ /app/syscred/
30
  COPY ontology/ /app/ontology/
31
+ COPY .env /app/.env
32
+
33
+ # Make ontology directory writable (fix Permission denied for TTL export)
34
+ RUN chmod -R 777 /app/ontology
35
 
36
  # Create user for HF Spaces (required)
37
  RUN useradd -m -u 1000 user
ontology/sysCRED_data.ttl ADDED
The diff for this file is too large to render. See raw diff
 
syscred/graph_rag.py CHANGED
@@ -65,7 +65,7 @@ class GraphRAG:
65
 
66
  # We reuse the specific query logic but tailored for retrieval
67
  query = """
68
- PREFIX cred: <https://github.com/DominiqueLoyer/systemFactChecking#>
69
 
70
  SELECT ?score ?level ?timestamp
71
  WHERE {
@@ -128,7 +128,7 @@ class GraphRAG:
128
  regex_pattern = "|".join(clean_kws)
129
 
130
  query = """
131
- PREFIX cred: <https://github.com/DominiqueLoyer/systemFactChecking#>
132
 
133
  SELECT ?report ?content ?score ?level ?timestamp
134
  WHERE {
@@ -256,7 +256,7 @@ class GraphRAG:
256
  return {'count': 0, 'avg_score': 0.5, 'scores': []}
257
 
258
  query = """
259
- PREFIX cred: <https://github.com/DominiqueLoyer/systemFactChecking#>
260
 
261
  SELECT ?score ?level ?timestamp
262
  WHERE {
 
65
 
66
  # We reuse the specific query logic but tailored for retrieval
67
  query = """
68
+ PREFIX cred: <https://syscred.uqam.ca/ontology#>
69
 
70
  SELECT ?score ?level ?timestamp
71
  WHERE {
 
128
  regex_pattern = "|".join(clean_kws)
129
 
130
  query = """
131
+ PREFIX cred: <https://syscred.uqam.ca/ontology#>
132
 
133
  SELECT ?report ?content ?score ?level ?timestamp
134
  WHERE {
 
256
  return {'count': 0, 'avg_score': 0.5, 'scores': []}
257
 
258
  query = """
259
+ PREFIX cred: <https://syscred.uqam.ca/ontology#>
260
 
261
  SELECT ?score ?level ?timestamp
262
  WHERE {
syscred/ontology_manager.py CHANGED
@@ -46,8 +46,8 @@ class OntologyManager:
46
  - Exporting enriched ontology
47
  """
48
 
49
- # Namespace for the credibility ontology
50
- CRED_NS = "https://github.com/DominiqueLoyer/systemFactChecking#"
51
 
52
  def __init__(self, base_ontology_path: Optional[str] = None, data_path: Optional[str] = None):
53
  """
@@ -254,7 +254,7 @@ class OntologyManager:
254
 
255
  # SPARQL query to find all evaluations for this URL
256
  query = """
257
- PREFIX cred: <http://www.dic9335.uqam.ca/ontologies/credibility-verification#>
258
  PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
259
 
260
  SELECT ?report ?score ?level ?timestamp ?content
@@ -298,7 +298,7 @@ class OntologyManager:
298
 
299
  # Count evaluations
300
  query = """
301
- PREFIX cred: <http://www.dic9335.uqam.ca/ontologies/credibility-verification#>
302
  SELECT (COUNT(?report) as ?count) WHERE {
303
  ?report a cred:RapportEvaluation .
304
  }
@@ -321,7 +321,7 @@ class OntologyManager:
321
 
322
  # Get the latest report ID
323
  latest_query = """
324
- PREFIX cred: <https://github.com/DominiqueLoyer/systemFactChecking#>
325
  SELECT ?report ?timestamp WHERE {
326
  ?report a cred:RapportEvaluation .
327
  ?report cred:completionTimestamp ?timestamp .
@@ -355,7 +355,7 @@ class OntologyManager:
355
 
356
  # Query triples related to this report (Level 1)
357
  related_query = """
358
- PREFIX cred: <https://github.com/DominiqueLoyer/systemFactChecking#>
359
  SELECT ?p ?o ?oType ?oLabel WHERE {
360
  <%s> ?p ?o .
361
  OPTIONAL { ?o a ?oType } .
@@ -454,6 +454,10 @@ class OntologyManager:
454
  def save_data(self) -> bool:
455
  """Save the data graph to its configured path."""
456
  if self.data_path:
 
 
 
 
457
  return self.export_to_ttl(self.data_path, include_base=False)
458
  return False
459
 
 
46
  - Exporting enriched ontology
47
  """
48
 
49
+ # Namespace for the credibility ontology (unified)
50
+ CRED_NS = "https://syscred.uqam.ca/ontology#"
51
 
52
  def __init__(self, base_ontology_path: Optional[str] = None, data_path: Optional[str] = None):
53
  """
 
254
 
255
  # SPARQL query to find all evaluations for this URL
256
  query = """
257
+ PREFIX cred: <https://syscred.uqam.ca/ontology#>
258
  PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
259
 
260
  SELECT ?report ?score ?level ?timestamp ?content
 
298
 
299
  # Count evaluations
300
  query = """
301
+ PREFIX cred: <https://syscred.uqam.ca/ontology#>
302
  SELECT (COUNT(?report) as ?count) WHERE {
303
  ?report a cred:RapportEvaluation .
304
  }
 
321
 
322
  # Get the latest report ID
323
  latest_query = """
324
+ PREFIX cred: <https://syscred.uqam.ca/ontology#>
325
  SELECT ?report ?timestamp WHERE {
326
  ?report a cred:RapportEvaluation .
327
  ?report cred:completionTimestamp ?timestamp .
 
355
 
356
  # Query triples related to this report (Level 1)
357
  related_query = """
358
+ PREFIX cred: <https://syscred.uqam.ca/ontology#>
359
  SELECT ?p ?o ?oType ?oLabel WHERE {
360
  <%s> ?p ?o .
361
  OPTIONAL { ?o a ?oType } .
 
454
  def save_data(self) -> bool:
455
  """Save the data graph to its configured path."""
456
  if self.data_path:
457
+ # Ensure directory exists (fix Permission denied on HF Space)
458
+ data_dir = os.path.dirname(self.data_path)
459
+ if data_dir:
460
+ os.makedirs(data_dir, exist_ok=True)
461
  return self.export_to_ttl(self.data_path, include_base=False)
462
  return False
463
 
syscred/verification_system.py CHANGED
@@ -974,10 +974,12 @@ class CredibilityVerificationSystem:
974
  nlp_results = self.nlp_analysis(cleaned_text)
975
 
976
  # 6.5 [NER] Named Entity Recognition
 
977
  ner_entities = {}
978
- if self.ner_analyzer and cleaned_text:
 
979
  try:
980
- ner_entities = self.ner_analyzer.extract_entities(cleaned_text)
981
  total = sum(len(v) for v in ner_entities.values() if isinstance(v, list))
982
  print(f"[SysCRED] NER: {total} entites detectees")
983
  except Exception as e:
@@ -989,8 +991,8 @@ class CredibilityVerificationSystem:
989
  try:
990
  url_for_eeat = input_data if is_url else ""
991
  domain_age_years = None
992
- if external_data.domain_age_days:
993
- domain_age_years = external_data.domain_age_days / 365.0
994
 
995
  eeat_raw = self.eeat_calculator.calculate(
996
  url=url_for_eeat,
 
974
  nlp_results = self.nlp_analysis(cleaned_text)
975
 
976
  # 6.5 [NER] Named Entity Recognition
977
+ # IMPORTANT: Use original text (not preprocessed) — NER needs capitalization
978
  ner_entities = {}
979
+ ner_input_text = text_to_analyze if text_to_analyze else input_data
980
+ if self.ner_analyzer and ner_input_text:
981
  try:
982
+ ner_entities = self.ner_analyzer.extract_entities(ner_input_text)
983
  total = sum(len(v) for v in ner_entities.values() if isinstance(v, list))
984
  print(f"[SysCRED] NER: {total} entites detectees")
985
  except Exception as e:
 
991
  try:
992
  url_for_eeat = input_data if is_url else ""
993
  domain_age_years = None
994
+ if external_data.domain_age_days is not None and external_data.domain_age_days > 0:
995
+ domain_age_years = float(external_data.domain_age_days) / 365.0
996
 
997
  eeat_raw = self.eeat_calculator.calculate(
998
  url=url_for_eeat,