RDF Validation Deployment
commited on
Commit
·
df23939
1
Parent(s):
a6b66ad
fixes
Browse files
app.py
CHANGED
|
@@ -78,16 +78,26 @@ def test_validator_functionality():
|
|
| 78 |
return False
|
| 79 |
|
| 80 |
try:
|
| 81 |
-
# Test with
|
| 82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
conforms, results = validate_rdf(test_rdf.encode('utf-8'), 'monograph')
|
| 84 |
|
| 85 |
-
# This should fail validation
|
| 86 |
if conforms:
|
| 87 |
print("⚠️ WARNING: Validator returned 'conforms=True' for invalid RDF. Validator may not be working correctly!")
|
| 88 |
return False
|
| 89 |
else:
|
| 90 |
-
|
|
|
|
|
|
|
| 91 |
return True
|
| 92 |
|
| 93 |
except Exception as e:
|
|
@@ -339,13 +349,14 @@ SAMPLE_VALID_RDF = '''<?xml version="1.0" encoding="UTF-8"?>
|
|
| 339 |
</rdf:RDF>'''
|
| 340 |
|
| 341 |
SAMPLE_INVALID_RDF = '''<?xml version="1.0" encoding="UTF-8"?>
|
| 342 |
-
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
| 343 |
-
|
| 344 |
-
<!--
|
| 345 |
-
<bf:Work rdf:about="http://example.org/work/1">
|
|
|
|
|
|
|
|
|
|
| 346 |
<bf:title>Incomplete Title</bf:title>
|
| 347 |
-
<!-- Missing rdf:type -->
|
| 348 |
-
<!-- Missing proper title structure -->
|
| 349 |
</bf:Work>
|
| 350 |
</rdf:RDF>'''
|
| 351 |
|
|
@@ -376,6 +387,25 @@ def validate_rdf_tool(rdf_content: str, template: str = "monograph") -> dict:
|
|
| 376 |
}
|
| 377 |
|
| 378 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 379 |
# Log what we're validating
|
| 380 |
logger.info(f"Validating RDF with template '{template}', content length: {len(rdf_content)}")
|
| 381 |
|
|
|
|
| 78 |
return False
|
| 79 |
|
| 80 |
try:
|
| 81 |
+
# Test with minimally valid RDF/XML that matches SHACL targets but is missing required properties
|
| 82 |
+
# This ensures SHACL finds focus nodes (bf:Text Work) and reports violations
|
| 83 |
+
test_rdf = '''<?xml version="1.0"?>
|
| 84 |
+
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
| 85 |
+
xmlns:bf="http://id.loc.gov/ontologies/bibframe/">
|
| 86 |
+
<bf:Work rdf:about="http://example.org/work/1">
|
| 87 |
+
<rdf:type rdf:resource="http://id.loc.gov/ontologies/bibframe/Text"/>
|
| 88 |
+
<!-- Intentionally missing title, language, content, adminMetadata to trigger SHACL violations -->
|
| 89 |
+
</bf:Work>
|
| 90 |
+
</rdf:RDF>'''
|
| 91 |
conforms, results = validate_rdf(test_rdf.encode('utf-8'), 'monograph')
|
| 92 |
|
| 93 |
+
# This should fail validation due to missing required properties
|
| 94 |
if conforms:
|
| 95 |
print("⚠️ WARNING: Validator returned 'conforms=True' for invalid RDF. Validator may not be working correctly!")
|
| 96 |
return False
|
| 97 |
else:
|
| 98 |
+
preview = (results or '').strip()
|
| 99 |
+
preview = preview[:200] + ('…' if len(preview) > 200 else '')
|
| 100 |
+
print(f"✅ Validator test passed. Got expected SHACL violations. Preview: {preview if preview else 'No results text returned'}")
|
| 101 |
return True
|
| 102 |
|
| 103 |
except Exception as e:
|
|
|
|
| 349 |
</rdf:RDF>'''
|
| 350 |
|
| 351 |
SAMPLE_INVALID_RDF = '''<?xml version="1.0" encoding="UTF-8"?>
|
| 352 |
+
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
| 353 |
+
xmlns:bf="http://id.loc.gov/ontologies/bibframe/">
|
| 354 |
+
<!-- Well-formed RDF/XML, but missing required properties to trigger SHACL violations -->
|
| 355 |
+
<bf:Work rdf:about="http://example.org/work/invalid-1">
|
| 356 |
+
<!-- Ensure target class is hit so SHACL runs -->
|
| 357 |
+
<rdf:type rdf:resource="http://id.loc.gov/ontologies/bibframe/Text"/>
|
| 358 |
+
<!-- Missing proper title structure, language, content, adminMetadata -->
|
| 359 |
<bf:title>Incomplete Title</bf:title>
|
|
|
|
|
|
|
| 360 |
</bf:Work>
|
| 361 |
</rdf:RDF>'''
|
| 362 |
|
|
|
|
| 387 |
}
|
| 388 |
|
| 389 |
try:
|
| 390 |
+
# Fast syntax check before SHACL to give clearer errors on XML/prefix issues
|
| 391 |
+
try:
|
| 392 |
+
try:
|
| 393 |
+
import rdflib # type: ignore
|
| 394 |
+
except ImportError:
|
| 395 |
+
rdflib = None # type: ignore
|
| 396 |
+
if rdflib:
|
| 397 |
+
g = rdflib.Graph() # type: ignore
|
| 398 |
+
# Parse as RDF/XML; raise on syntax errors like unbound prefixes
|
| 399 |
+
g.parse(data=rdf_content, format="application/rdf+xml") # type: ignore
|
| 400 |
+
else:
|
| 401 |
+
logger.info("rdflib not installed; skipping pre-parse RDF/XML syntax check")
|
| 402 |
+
except Exception as parse_err:
|
| 403 |
+
logger.error(f"RDF/XML parse error before validation: {parse_err}")
|
| 404 |
+
return {
|
| 405 |
+
"error": f"RDF/XML parse error: {parse_err}",
|
| 406 |
+
"conforms": False
|
| 407 |
+
}
|
| 408 |
+
|
| 409 |
# Log what we're validating
|
| 410 |
logger.info(f"Validating RDF with template '{template}', content length: {len(rdf_content)}")
|
| 411 |
|