D Ф m i И i q ц e L Ф y e r commited on
Commit ·
cedf5ef
1
Parent(s): 2b06f78
fix: unify namespace to syscred.uqam.ca/ontology#, fix NER/E-E-A-T/ontology permissions
Browse files- graph_rag.py: 3 SPARQL queries → syscred.uqam.ca/ontology#
- ontology_manager.py: namespace + os.makedirs for save
- verification_system.py: NER uses original text, E-E-A-T domain_age guard
- Dockerfile: chmod 777 /app/ontology, copy .env
- Added sysCRED_data.ttl (4739 lines, Sandbox copy)
- Dockerfile +4 -0
- ontology/sysCRED_data.ttl +0 -0
- syscred/graph_rag.py +3 -3
- syscred/ontology_manager.py +10 -6
- syscred/verification_system.py +6 -4
Dockerfile
CHANGED
|
@@ -28,6 +28,10 @@ RUN python -m spacy download fr_core_news_md || true
|
|
| 28 |
# Copy application code
|
| 29 |
COPY syscred/ /app/syscred/
|
| 30 |
COPY ontology/ /app/ontology/
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
# Create user for HF Spaces (required)
|
| 33 |
RUN useradd -m -u 1000 user
|
|
|
|
| 28 |
# Copy application code
|
| 29 |
COPY syscred/ /app/syscred/
|
| 30 |
COPY ontology/ /app/ontology/
|
| 31 |
+
COPY .env /app/.env
|
| 32 |
+
|
| 33 |
+
# Make ontology directory writable (fix Permission denied for TTL export)
|
| 34 |
+
RUN chmod -R 777 /app/ontology
|
| 35 |
|
| 36 |
# Create user for HF Spaces (required)
|
| 37 |
RUN useradd -m -u 1000 user
|
ontology/sysCRED_data.ttl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
syscred/graph_rag.py
CHANGED
|
@@ -65,7 +65,7 @@ class GraphRAG:
|
|
| 65 |
|
| 66 |
# We reuse the specific query logic but tailored for retrieval
|
| 67 |
query = """
|
| 68 |
-
PREFIX cred: <https://
|
| 69 |
|
| 70 |
SELECT ?score ?level ?timestamp
|
| 71 |
WHERE {
|
|
@@ -128,7 +128,7 @@ class GraphRAG:
|
|
| 128 |
regex_pattern = "|".join(clean_kws)
|
| 129 |
|
| 130 |
query = """
|
| 131 |
-
PREFIX cred: <https://
|
| 132 |
|
| 133 |
SELECT ?report ?content ?score ?level ?timestamp
|
| 134 |
WHERE {
|
|
@@ -256,7 +256,7 @@ class GraphRAG:
|
|
| 256 |
return {'count': 0, 'avg_score': 0.5, 'scores': []}
|
| 257 |
|
| 258 |
query = """
|
| 259 |
-
PREFIX cred: <https://
|
| 260 |
|
| 261 |
SELECT ?score ?level ?timestamp
|
| 262 |
WHERE {
|
|
|
|
| 65 |
|
| 66 |
# We reuse the specific query logic but tailored for retrieval
|
| 67 |
query = """
|
| 68 |
+
PREFIX cred: <https://syscred.uqam.ca/ontology#>
|
| 69 |
|
| 70 |
SELECT ?score ?level ?timestamp
|
| 71 |
WHERE {
|
|
|
|
| 128 |
regex_pattern = "|".join(clean_kws)
|
| 129 |
|
| 130 |
query = """
|
| 131 |
+
PREFIX cred: <https://syscred.uqam.ca/ontology#>
|
| 132 |
|
| 133 |
SELECT ?report ?content ?score ?level ?timestamp
|
| 134 |
WHERE {
|
|
|
|
| 256 |
return {'count': 0, 'avg_score': 0.5, 'scores': []}
|
| 257 |
|
| 258 |
query = """
|
| 259 |
+
PREFIX cred: <https://syscred.uqam.ca/ontology#>
|
| 260 |
|
| 261 |
SELECT ?score ?level ?timestamp
|
| 262 |
WHERE {
|
syscred/ontology_manager.py
CHANGED
|
@@ -46,8 +46,8 @@ class OntologyManager:
|
|
| 46 |
- Exporting enriched ontology
|
| 47 |
"""
|
| 48 |
|
| 49 |
-
# Namespace for the credibility ontology
|
| 50 |
-
CRED_NS = "https://
|
| 51 |
|
| 52 |
def __init__(self, base_ontology_path: Optional[str] = None, data_path: Optional[str] = None):
|
| 53 |
"""
|
|
@@ -254,7 +254,7 @@ class OntologyManager:
|
|
| 254 |
|
| 255 |
# SPARQL query to find all evaluations for this URL
|
| 256 |
query = """
|
| 257 |
-
PREFIX cred: <
|
| 258 |
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
|
| 259 |
|
| 260 |
SELECT ?report ?score ?level ?timestamp ?content
|
|
@@ -298,7 +298,7 @@ class OntologyManager:
|
|
| 298 |
|
| 299 |
# Count evaluations
|
| 300 |
query = """
|
| 301 |
-
PREFIX cred: <
|
| 302 |
SELECT (COUNT(?report) as ?count) WHERE {
|
| 303 |
?report a cred:RapportEvaluation .
|
| 304 |
}
|
|
@@ -321,7 +321,7 @@ class OntologyManager:
|
|
| 321 |
|
| 322 |
# Get the latest report ID
|
| 323 |
latest_query = """
|
| 324 |
-
PREFIX cred: <https://
|
| 325 |
SELECT ?report ?timestamp WHERE {
|
| 326 |
?report a cred:RapportEvaluation .
|
| 327 |
?report cred:completionTimestamp ?timestamp .
|
|
@@ -355,7 +355,7 @@ class OntologyManager:
|
|
| 355 |
|
| 356 |
# Query triples related to this report (Level 1)
|
| 357 |
related_query = """
|
| 358 |
-
PREFIX cred: <https://
|
| 359 |
SELECT ?p ?o ?oType ?oLabel WHERE {
|
| 360 |
<%s> ?p ?o .
|
| 361 |
OPTIONAL { ?o a ?oType } .
|
|
@@ -454,6 +454,10 @@ class OntologyManager:
|
|
| 454 |
def save_data(self) -> bool:
|
| 455 |
"""Save the data graph to its configured path."""
|
| 456 |
if self.data_path:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 457 |
return self.export_to_ttl(self.data_path, include_base=False)
|
| 458 |
return False
|
| 459 |
|
|
|
|
| 46 |
- Exporting enriched ontology
|
| 47 |
"""
|
| 48 |
|
| 49 |
+
# Namespace for the credibility ontology (unified)
|
| 50 |
+
CRED_NS = "https://syscred.uqam.ca/ontology#"
|
| 51 |
|
| 52 |
def __init__(self, base_ontology_path: Optional[str] = None, data_path: Optional[str] = None):
|
| 53 |
"""
|
|
|
|
| 254 |
|
| 255 |
# SPARQL query to find all evaluations for this URL
|
| 256 |
query = """
|
| 257 |
+
PREFIX cred: <https://syscred.uqam.ca/ontology#>
|
| 258 |
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
|
| 259 |
|
| 260 |
SELECT ?report ?score ?level ?timestamp ?content
|
|
|
|
| 298 |
|
| 299 |
# Count evaluations
|
| 300 |
query = """
|
| 301 |
+
PREFIX cred: <https://syscred.uqam.ca/ontology#>
|
| 302 |
SELECT (COUNT(?report) as ?count) WHERE {
|
| 303 |
?report a cred:RapportEvaluation .
|
| 304 |
}
|
|
|
|
| 321 |
|
| 322 |
# Get the latest report ID
|
| 323 |
latest_query = """
|
| 324 |
+
PREFIX cred: <https://syscred.uqam.ca/ontology#>
|
| 325 |
SELECT ?report ?timestamp WHERE {
|
| 326 |
?report a cred:RapportEvaluation .
|
| 327 |
?report cred:completionTimestamp ?timestamp .
|
|
|
|
| 355 |
|
| 356 |
# Query triples related to this report (Level 1)
|
| 357 |
related_query = """
|
| 358 |
+
PREFIX cred: <https://syscred.uqam.ca/ontology#>
|
| 359 |
SELECT ?p ?o ?oType ?oLabel WHERE {
|
| 360 |
<%s> ?p ?o .
|
| 361 |
OPTIONAL { ?o a ?oType } .
|
|
|
|
| 454 |
def save_data(self) -> bool:
|
| 455 |
"""Save the data graph to its configured path."""
|
| 456 |
if self.data_path:
|
| 457 |
+
# Ensure directory exists (fix Permission denied on HF Space)
|
| 458 |
+
data_dir = os.path.dirname(self.data_path)
|
| 459 |
+
if data_dir:
|
| 460 |
+
os.makedirs(data_dir, exist_ok=True)
|
| 461 |
return self.export_to_ttl(self.data_path, include_base=False)
|
| 462 |
return False
|
| 463 |
|
syscred/verification_system.py
CHANGED
|
@@ -974,10 +974,12 @@ class CredibilityVerificationSystem:
|
|
| 974 |
nlp_results = self.nlp_analysis(cleaned_text)
|
| 975 |
|
| 976 |
# 6.5 [NER] Named Entity Recognition
|
|
|
|
| 977 |
ner_entities = {}
|
| 978 |
-
if
|
|
|
|
| 979 |
try:
|
| 980 |
-
ner_entities = self.ner_analyzer.extract_entities(
|
| 981 |
total = sum(len(v) for v in ner_entities.values() if isinstance(v, list))
|
| 982 |
print(f"[SysCRED] NER: {total} entites detectees")
|
| 983 |
except Exception as e:
|
|
@@ -989,8 +991,8 @@ class CredibilityVerificationSystem:
|
|
| 989 |
try:
|
| 990 |
url_for_eeat = input_data if is_url else ""
|
| 991 |
domain_age_years = None
|
| 992 |
-
if external_data.domain_age_days:
|
| 993 |
-
domain_age_years = external_data.domain_age_days / 365.0
|
| 994 |
|
| 995 |
eeat_raw = self.eeat_calculator.calculate(
|
| 996 |
url=url_for_eeat,
|
|
|
|
| 974 |
nlp_results = self.nlp_analysis(cleaned_text)
|
| 975 |
|
| 976 |
# 6.5 [NER] Named Entity Recognition
|
| 977 |
+
# IMPORTANT: Use original text (not preprocessed) — NER needs capitalization
|
| 978 |
ner_entities = {}
|
| 979 |
+
ner_input_text = text_to_analyze if text_to_analyze else input_data
|
| 980 |
+
if self.ner_analyzer and ner_input_text:
|
| 981 |
try:
|
| 982 |
+
ner_entities = self.ner_analyzer.extract_entities(ner_input_text)
|
| 983 |
total = sum(len(v) for v in ner_entities.values() if isinstance(v, list))
|
| 984 |
print(f"[SysCRED] NER: {total} entites detectees")
|
| 985 |
except Exception as e:
|
|
|
|
| 991 |
try:
|
| 992 |
url_for_eeat = input_data if is_url else ""
|
| 993 |
domain_age_years = None
|
| 994 |
+
if external_data.domain_age_days is not None and external_data.domain_age_days > 0:
|
| 995 |
+
domain_age_years = float(external_data.domain_age_days) / 365.0
|
| 996 |
|
| 997 |
eeat_raw = self.eeat_calculator.calculate(
|
| 998 |
url=url_for_eeat,
|