RDF Validation Deployment commited on
Commit
df23939
·
1 Parent(s): a6b66ad
Files changed (1) hide show
  1. app.py +40 -10
app.py CHANGED
@@ -78,16 +78,26 @@ def test_validator_functionality():
78
  return False
79
 
80
  try:
81
- # Test with a simple invalid RDF
82
- test_rdf = '<?xml version="1.0"?><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"><bf:Work/></rdf:RDF>'
 
 
 
 
 
 
 
 
83
  conforms, results = validate_rdf(test_rdf.encode('utf-8'), 'monograph')
84
 
85
- # This should fail validation (missing namespace, missing properties)
86
  if conforms:
87
  print("⚠️ WARNING: Validator returned 'conforms=True' for invalid RDF. Validator may not be working correctly!")
88
  return False
89
  else:
90
- print(f"✅ Validator test passed. Got expected failure: {results[:100] if results else 'No results'}")
 
 
91
  return True
92
 
93
  except Exception as e:
@@ -339,13 +349,14 @@ SAMPLE_VALID_RDF = '''<?xml version="1.0" encoding="UTF-8"?>
339
  </rdf:RDF>'''
340
 
341
  SAMPLE_INVALID_RDF = '''<?xml version="1.0" encoding="UTF-8"?>
342
- <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
343
- <!-- Missing namespace declarations -->
344
- <!-- Missing required properties -->
345
- <bf:Work rdf:about="http://example.org/work/1">
 
 
 
346
  <bf:title>Incomplete Title</bf:title>
347
- <!-- Missing rdf:type -->
348
- <!-- Missing proper title structure -->
349
  </bf:Work>
350
  </rdf:RDF>'''
351
 
@@ -376,6 +387,25 @@ def validate_rdf_tool(rdf_content: str, template: str = "monograph") -> dict:
376
  }
377
 
378
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
379
  # Log what we're validating
380
  logger.info(f"Validating RDF with template '{template}', content length: {len(rdf_content)}")
381
 
 
78
  return False
79
 
80
  try:
81
+ # Test with minimally valid RDF/XML that matches SHACL targets but is missing required properties
82
+ # This ensures SHACL finds focus nodes (bf:Text Work) and reports violations
83
+ test_rdf = '''<?xml version="1.0"?>
84
+ <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
85
+ xmlns:bf="http://id.loc.gov/ontologies/bibframe/">
86
+ <bf:Work rdf:about="http://example.org/work/1">
87
+ <rdf:type rdf:resource="http://id.loc.gov/ontologies/bibframe/Text"/>
88
+ <!-- Intentionally missing title, language, content, adminMetadata to trigger SHACL violations -->
89
+ </bf:Work>
90
+ </rdf:RDF>'''
91
  conforms, results = validate_rdf(test_rdf.encode('utf-8'), 'monograph')
92
 
93
+ # This should fail validation due to missing required properties
94
  if conforms:
95
  print("⚠️ WARNING: Validator returned 'conforms=True' for invalid RDF. Validator may not be working correctly!")
96
  return False
97
  else:
98
+ preview = (results or '').strip()
99
+ preview = preview[:200] + ('…' if len(preview) > 200 else '')
100
+ print(f"✅ Validator test passed. Got expected SHACL violations. Preview: {preview if preview else 'No results text returned'}")
101
  return True
102
 
103
  except Exception as e:
 
349
  </rdf:RDF>'''
350
 
351
  SAMPLE_INVALID_RDF = '''<?xml version="1.0" encoding="UTF-8"?>
352
+ <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
353
+ xmlns:bf="http://id.loc.gov/ontologies/bibframe/">
354
+ <!-- Well-formed RDF/XML, but missing required properties to trigger SHACL violations -->
355
+ <bf:Work rdf:about="http://example.org/work/invalid-1">
356
+ <!-- Ensure target class is hit so SHACL runs -->
357
+ <rdf:type rdf:resource="http://id.loc.gov/ontologies/bibframe/Text"/>
358
+ <!-- Missing proper title structure, language, content, adminMetadata -->
359
  <bf:title>Incomplete Title</bf:title>
 
 
360
  </bf:Work>
361
  </rdf:RDF>'''
362
 
 
387
  }
388
 
389
  try:
390
+ # Fast syntax check before SHACL to give clearer errors on XML/prefix issues
391
+ try:
392
+ try:
393
+ import rdflib # type: ignore
394
+ except ImportError:
395
+ rdflib = None # type: ignore
396
+ if rdflib:
397
+ g = rdflib.Graph() # type: ignore
398
+ # Parse as RDF/XML; raise on syntax errors like unbound prefixes
399
+ g.parse(data=rdf_content, format="application/rdf+xml") # type: ignore
400
+ else:
401
+ logger.info("rdflib not installed; skipping pre-parse RDF/XML syntax check")
402
+ except Exception as parse_err:
403
+ logger.error(f"RDF/XML parse error before validation: {parse_err}")
404
+ return {
405
+ "error": f"RDF/XML parse error: {parse_err}",
406
+ "conforms": False
407
+ }
408
+
409
  # Log what we're validating
410
  logger.info(f"Validating RDF with template '{template}', content length: {len(rdf_content)}")
411