File size: 6,597 Bytes
a40763c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
#!/usr/bin/env python3
"""
Standalone test for rapid_fix_missing_properties - no dependencies
"""
import re
from typing import Optional, List
# Sample invalid RDF
SAMPLE_INVALID_RDF = """<?xml version="1.0" encoding="UTF-8"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:bf="http://id.loc.gov/ontologies/bibframe/">
<bf:Work rdf:about="http://example.org/work/invalid-1">
<rdf:type rdf:resource="http://id.loc.gov/ontologies/bibframe/Text"/>
<bf:title>Incomplete Title</bf:title>
</bf:Work>
</rdf:RDF>"""
# Validation errors
SAMPLE_VALIDATION_ERRORS = """
=== Module: MonographDCTAP/Monograph_Work_Text.tsv ===
Message: Less than 1 values on Work->bf:language
Message: Less than 1 values on Work->bf:content
Message: Less than 1 values on Work->bf:adminMetadata
"""
# Copy of the rapid_fix function
def rapid_fix_missing_properties(rdf_content: str, validation_results: str, template: str, steps_log: Optional[List[str]] = None) -> Optional[str]:
"""Ultra-fast fix for simple missing property errors - no AI needed."""
# Quick pattern match for missing properties
missing = re.findall(r"Less than \d+ values on.*->bf:(\w+)", validation_results)
if not missing:
if steps_log:
steps_log.append("β Rapid fix: No missing properties detected in validation results")
return None
if steps_log:
steps_log.append(f"π Rapid fix detected {len(missing)} missing properties: {', '.join(set(missing))}")
# Pre-compiled property templates
INSTANT_FIXES = {
"title": '<bf:title><bf:Title><bf:mainTitle>Untitled</bf:mainTitle></bf:Title></bf:title>',
"language": '<bf:language><bf:Language rdf:about="http://id.loc.gov/vocabulary/languages/eng"><rdfs:label>English</rdfs:label><bf:code>eng</bf:code></bf:Language></bf:language>',
"content": '<bf:content><bf:Content rdf:about="http://id.loc.gov/vocabulary/contentTypes/txt"><rdfs:label>text</rdfs:label><bf:code>txt</bf:code></bf:Content></bf:content>',
"adminMetadata": '''<bf:adminMetadata>
<bf:AdminMetadata>
<bf:status>
<bf:Status rdf:about="http://id.loc.gov/vocabulary/mstatus/n">
<rdfs:label>new</rdfs:label>
<bf:code>n</bf:code>
</bf:Status>
</bf:status>
<bf:date rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2024-01-01</bf:date>
<bf:agent>
<bf:Agent rdf:about="http://id.loc.gov/vocabulary/organizations/dlc">
<rdf:type rdf:resource="http://id.loc.gov/ontologies/bibframe/Organization"/>
<rdfs:label>Library of Congress</rdfs:label>
</bf:Agent>
</bf:agent>
<bf:assigner>
<bf:Agent rdf:about="http://id.loc.gov/vocabulary/organizations/dlc">
<rdf:type rdf:resource="http://id.loc.gov/ontologies/bibframe/Organization"/>
<rdfs:label>Library of Congress</rdfs:label>
</bf:Agent>
</bf:assigner>
</bf:AdminMetadata>
</bf:adminMetadata>''',
}
# Find insertion point
work_match = re.search(r'(<bf:Work[^>]*>)(.*?)(</bf:Work>)', rdf_content, re.DOTALL)
instance_match = re.search(r'(<bf:Instance[^>]*>)(.*?)(</bf:Instance>)', rdf_content, re.DOTALL)
if not work_match and not instance_match:
if steps_log:
steps_log.append("β Rapid fix: No bf:Work or bf:Instance found in RDF")
return None
match = work_match or instance_match
target_type = "Work" if work_match else "Instance"
opening_tag = match.group(1)
content = match.group(2)
closing_tag = match.group(3)
if steps_log:
steps_log.append(f"π Rapid fix target: bf:{target_type}")
has_admin = "<bf:adminMetadata>" in content or "<bf:AdminMetadata>" in content
steps_log.append(f"π Current state: AdminMetadata {'EXISTS' if has_admin else 'MISSING'}")
# Build fixes
fixes = []
for prop in missing[:10]:
prop_lower = prop.lower()
if steps_log:
steps_log.append(f"π Processing property: '{prop}' (lowercase: '{prop_lower}')")
steps_log.append(f" Check: Is '{prop_lower}' in INSTANT_FIXES? {prop_lower in INSTANT_FIXES}")
steps_log.append(f" Check: Is '<bf:{prop}' in content? {'<bf:' + prop in content}")
if prop in INSTANT_FIXES and f"<bf:{prop}" not in content:
fixes.append(INSTANT_FIXES[prop])
if steps_log:
steps_log.append(f" β
Will add missing '{prop}' property")
elif prop in INSTANT_FIXES:
if steps_log:
steps_log.append(f" βΉοΈ Property '{prop}' already exists, skipping")
elif steps_log:
steps_log.append(f" β οΈ No template for '{prop}', skipping")
if not fixes:
if steps_log:
steps_log.append("β Rapid fix: No properties could be fixed")
return None
# Insert all at once
if steps_log:
steps_log.append(f"π¨ Adding {len(fixes)} missing properties to {target_type}")
fixed_content = opening_tag + content + '\n ' + '\n '.join(fixes) + '\n' + closing_tag
# Replace in original RDF
result = rdf_content.replace(match.group(0), fixed_content)
if steps_log:
steps_log.append(f"β
Rapid fix complete: Added {len(fixes)} properties")
return result
# Run test
print("=" * 80)
print("π§ͺ TESTING RAPID FIX LOGIC")
print("=" * 80)
print("\nπ INPUT RDF:")
print(SAMPLE_INVALID_RDF)
print("\nβ VALIDATION ERRORS:")
print(SAMPLE_VALIDATION_ERRORS)
steps_log = []
result = rapid_fix_missing_properties(SAMPLE_INVALID_RDF, SAMPLE_VALIDATION_ERRORS, 'monograph', steps_log)
print("\n" + "=" * 80)
print("π STEP-BY-STEP LOG:")
print("=" * 80)
for step in steps_log:
print(step)
print("\n" + "=" * 80)
if result:
print("β
RAPID FIX PRODUCED OUTPUT:")
print("=" * 80)
print(result)
print("\n" + "=" * 80)
print("π ANALYSIS:")
print("=" * 80)
if "<bf:language>" in result:
print("β
Added bf:language")
if "<bf:content>" in result:
print("β
Added bf:content")
if "<bf:adminMetadata>" in result:
print("β
Added bf:adminMetadata")
if "<bf:assigner>" in result:
print(" β
AdminMetadata includes bf:assigner")
else:
print(" β AdminMetadata MISSING bf:assigner!")
else:
print("β RAPID FIX RETURNED None")
|