mcp4rdf / test_rapid_fix_standalone.py
RDF Validation Deployment
improved
a40763c
#!/usr/bin/env python3
"""
Standalone test for rapid_fix_missing_properties - no dependencies
"""
import re
from typing import Optional, List
# Sample invalid RDF
SAMPLE_INVALID_RDF = """<?xml version="1.0" encoding="UTF-8"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:bf="http://id.loc.gov/ontologies/bibframe/">
<bf:Work rdf:about="http://example.org/work/invalid-1">
<rdf:type rdf:resource="http://id.loc.gov/ontologies/bibframe/Text"/>
<bf:title>Incomplete Title</bf:title>
</bf:Work>
</rdf:RDF>"""
# Validation errors
SAMPLE_VALIDATION_ERRORS = """
=== Module: MonographDCTAP/Monograph_Work_Text.tsv ===
Message: Less than 1 values on Work->bf:language
Message: Less than 1 values on Work->bf:content
Message: Less than 1 values on Work->bf:adminMetadata
"""
# Copy of the rapid_fix function
def rapid_fix_missing_properties(rdf_content: str, validation_results: str, template: str, steps_log: Optional[List[str]] = None) -> Optional[str]:
"""Ultra-fast fix for simple missing property errors - no AI needed."""
# Quick pattern match for missing properties
missing = re.findall(r"Less than \d+ values on.*->bf:(\w+)", validation_results)
if not missing:
if steps_log:
steps_log.append("❌ Rapid fix: No missing properties detected in validation results")
return None
if steps_log:
steps_log.append(f"πŸ” Rapid fix detected {len(missing)} missing properties: {', '.join(set(missing))}")
# Pre-compiled property templates
INSTANT_FIXES = {
"title": '<bf:title><bf:Title><bf:mainTitle>Untitled</bf:mainTitle></bf:Title></bf:title>',
"language": '<bf:language><bf:Language rdf:about="http://id.loc.gov/vocabulary/languages/eng"><rdfs:label>English</rdfs:label><bf:code>eng</bf:code></bf:Language></bf:language>',
"content": '<bf:content><bf:Content rdf:about="http://id.loc.gov/vocabulary/contentTypes/txt"><rdfs:label>text</rdfs:label><bf:code>txt</bf:code></bf:Content></bf:content>',
"adminMetadata": '''<bf:adminMetadata>
<bf:AdminMetadata>
<bf:status>
<bf:Status rdf:about="http://id.loc.gov/vocabulary/mstatus/n">
<rdfs:label>new</rdfs:label>
<bf:code>n</bf:code>
</bf:Status>
</bf:status>
<bf:date rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2024-01-01</bf:date>
<bf:agent>
<bf:Agent rdf:about="http://id.loc.gov/vocabulary/organizations/dlc">
<rdf:type rdf:resource="http://id.loc.gov/ontologies/bibframe/Organization"/>
<rdfs:label>Library of Congress</rdfs:label>
</bf:Agent>
</bf:agent>
<bf:assigner>
<bf:Agent rdf:about="http://id.loc.gov/vocabulary/organizations/dlc">
<rdf:type rdf:resource="http://id.loc.gov/ontologies/bibframe/Organization"/>
<rdfs:label>Library of Congress</rdfs:label>
</bf:Agent>
</bf:assigner>
</bf:AdminMetadata>
</bf:adminMetadata>''',
}
# Find insertion point
work_match = re.search(r'(<bf:Work[^>]*>)(.*?)(</bf:Work>)', rdf_content, re.DOTALL)
instance_match = re.search(r'(<bf:Instance[^>]*>)(.*?)(</bf:Instance>)', rdf_content, re.DOTALL)
if not work_match and not instance_match:
if steps_log:
steps_log.append("❌ Rapid fix: No bf:Work or bf:Instance found in RDF")
return None
match = work_match or instance_match
target_type = "Work" if work_match else "Instance"
opening_tag = match.group(1)
content = match.group(2)
closing_tag = match.group(3)
if steps_log:
steps_log.append(f"πŸ“ Rapid fix target: bf:{target_type}")
has_admin = "<bf:adminMetadata>" in content or "<bf:AdminMetadata>" in content
steps_log.append(f"πŸ” Current state: AdminMetadata {'EXISTS' if has_admin else 'MISSING'}")
# Build fixes
fixes = []
for prop in missing[:10]:
prop_lower = prop.lower()
if steps_log:
steps_log.append(f"πŸ” Processing property: '{prop}' (lowercase: '{prop_lower}')")
steps_log.append(f" Check: Is '{prop_lower}' in INSTANT_FIXES? {prop_lower in INSTANT_FIXES}")
steps_log.append(f" Check: Is '<bf:{prop}' in content? {'<bf:' + prop in content}")
if prop in INSTANT_FIXES and f"<bf:{prop}" not in content:
fixes.append(INSTANT_FIXES[prop])
if steps_log:
steps_log.append(f" βœ… Will add missing '{prop}' property")
elif prop in INSTANT_FIXES:
if steps_log:
steps_log.append(f" ℹ️ Property '{prop}' already exists, skipping")
elif steps_log:
steps_log.append(f" ⚠️ No template for '{prop}', skipping")
if not fixes:
if steps_log:
steps_log.append("❌ Rapid fix: No properties could be fixed")
return None
# Insert all at once
if steps_log:
steps_log.append(f"πŸ”¨ Adding {len(fixes)} missing properties to {target_type}")
fixed_content = opening_tag + content + '\n ' + '\n '.join(fixes) + '\n' + closing_tag
# Replace in original RDF
result = rdf_content.replace(match.group(0), fixed_content)
if steps_log:
steps_log.append(f"βœ… Rapid fix complete: Added {len(fixes)} properties")
return result
# Run test
print("=" * 80)
print("πŸ§ͺ TESTING RAPID FIX LOGIC")
print("=" * 80)
print("\nπŸ“„ INPUT RDF:")
print(SAMPLE_INVALID_RDF)
print("\n❌ VALIDATION ERRORS:")
print(SAMPLE_VALIDATION_ERRORS)
steps_log = []
result = rapid_fix_missing_properties(SAMPLE_INVALID_RDF, SAMPLE_VALIDATION_ERRORS, 'monograph', steps_log)
print("\n" + "=" * 80)
print("πŸ“‹ STEP-BY-STEP LOG:")
print("=" * 80)
for step in steps_log:
print(step)
print("\n" + "=" * 80)
if result:
print("βœ… RAPID FIX PRODUCED OUTPUT:")
print("=" * 80)
print(result)
print("\n" + "=" * 80)
print("πŸ” ANALYSIS:")
print("=" * 80)
if "<bf:language>" in result:
print("βœ… Added bf:language")
if "<bf:content>" in result:
print("βœ… Added bf:content")
if "<bf:adminMetadata>" in result:
print("βœ… Added bf:adminMetadata")
if "<bf:assigner>" in result:
print(" βœ… AdminMetadata includes bf:assigner")
else:
print(" ❌ AdminMetadata MISSING bf:assigner!")
else:
print("❌ RAPID FIX RETURNED None")