|
|
|
|
|
""" |
|
|
Standalone test for rapid_fix_missing_properties - no dependencies |
|
|
""" |
|
|
import re |
|
|
from typing import Optional, List |
|
|
|
|
|
|
|
|
SAMPLE_INVALID_RDF = """<?xml version="1.0" encoding="UTF-8"?> |
|
|
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" |
|
|
xmlns:bf="http://id.loc.gov/ontologies/bibframe/"> |
|
|
<bf:Work rdf:about="http://example.org/work/invalid-1"> |
|
|
<rdf:type rdf:resource="http://id.loc.gov/ontologies/bibframe/Text"/> |
|
|
<bf:title>Incomplete Title</bf:title> |
|
|
</bf:Work> |
|
|
</rdf:RDF>""" |
|
|
|
|
|
|
|
|
SAMPLE_VALIDATION_ERRORS = """ |
|
|
=== Module: MonographDCTAP/Monograph_Work_Text.tsv === |
|
|
Message: Less than 1 values on Work->bf:language |
|
|
Message: Less than 1 values on Work->bf:content |
|
|
Message: Less than 1 values on Work->bf:adminMetadata |
|
|
""" |
|
|
|
|
|
|
|
|
def rapid_fix_missing_properties(rdf_content: str, validation_results: str, template: str, steps_log: Optional[List[str]] = None) -> Optional[str]: |
|
|
"""Ultra-fast fix for simple missing property errors - no AI needed.""" |
|
|
|
|
|
|
|
|
missing = re.findall(r"Less than \d+ values on.*->bf:(\w+)", validation_results) |
|
|
if not missing: |
|
|
if steps_log: |
|
|
steps_log.append("β Rapid fix: No missing properties detected in validation results") |
|
|
return None |
|
|
|
|
|
if steps_log: |
|
|
steps_log.append(f"π Rapid fix detected {len(missing)} missing properties: {', '.join(set(missing))}") |
|
|
|
|
|
|
|
|
INSTANT_FIXES = { |
|
|
"title": '<bf:title><bf:Title><bf:mainTitle>Untitled</bf:mainTitle></bf:Title></bf:title>', |
|
|
"language": '<bf:language><bf:Language rdf:about="http://id.loc.gov/vocabulary/languages/eng"><rdfs:label>English</rdfs:label><bf:code>eng</bf:code></bf:Language></bf:language>', |
|
|
"content": '<bf:content><bf:Content rdf:about="http://id.loc.gov/vocabulary/contentTypes/txt"><rdfs:label>text</rdfs:label><bf:code>txt</bf:code></bf:Content></bf:content>', |
|
|
"adminMetadata": '''<bf:adminMetadata> |
|
|
<bf:AdminMetadata> |
|
|
<bf:status> |
|
|
<bf:Status rdf:about="http://id.loc.gov/vocabulary/mstatus/n"> |
|
|
<rdfs:label>new</rdfs:label> |
|
|
<bf:code>n</bf:code> |
|
|
</bf:Status> |
|
|
</bf:status> |
|
|
<bf:date rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2024-01-01</bf:date> |
|
|
<bf:agent> |
|
|
<bf:Agent rdf:about="http://id.loc.gov/vocabulary/organizations/dlc"> |
|
|
<rdf:type rdf:resource="http://id.loc.gov/ontologies/bibframe/Organization"/> |
|
|
<rdfs:label>Library of Congress</rdfs:label> |
|
|
</bf:Agent> |
|
|
</bf:agent> |
|
|
<bf:assigner> |
|
|
<bf:Agent rdf:about="http://id.loc.gov/vocabulary/organizations/dlc"> |
|
|
<rdf:type rdf:resource="http://id.loc.gov/ontologies/bibframe/Organization"/> |
|
|
<rdfs:label>Library of Congress</rdfs:label> |
|
|
</bf:Agent> |
|
|
</bf:assigner> |
|
|
</bf:AdminMetadata> |
|
|
</bf:adminMetadata>''', |
|
|
} |
|
|
|
|
|
|
|
|
work_match = re.search(r'(<bf:Work[^>]*>)(.*?)(</bf:Work>)', rdf_content, re.DOTALL) |
|
|
instance_match = re.search(r'(<bf:Instance[^>]*>)(.*?)(</bf:Instance>)', rdf_content, re.DOTALL) |
|
|
|
|
|
if not work_match and not instance_match: |
|
|
if steps_log: |
|
|
steps_log.append("β Rapid fix: No bf:Work or bf:Instance found in RDF") |
|
|
return None |
|
|
|
|
|
match = work_match or instance_match |
|
|
target_type = "Work" if work_match else "Instance" |
|
|
opening_tag = match.group(1) |
|
|
content = match.group(2) |
|
|
closing_tag = match.group(3) |
|
|
|
|
|
if steps_log: |
|
|
steps_log.append(f"π Rapid fix target: bf:{target_type}") |
|
|
has_admin = "<bf:adminMetadata>" in content or "<bf:AdminMetadata>" in content |
|
|
steps_log.append(f"π Current state: AdminMetadata {'EXISTS' if has_admin else 'MISSING'}") |
|
|
|
|
|
|
|
|
fixes = [] |
|
|
|
|
|
for prop in missing[:10]: |
|
|
prop_lower = prop.lower() |
|
|
|
|
|
if steps_log: |
|
|
steps_log.append(f"π Processing property: '{prop}' (lowercase: '{prop_lower}')") |
|
|
steps_log.append(f" Check: Is '{prop_lower}' in INSTANT_FIXES? {prop_lower in INSTANT_FIXES}") |
|
|
steps_log.append(f" Check: Is '<bf:{prop}' in content? {'<bf:' + prop in content}") |
|
|
|
|
|
if prop in INSTANT_FIXES and f"<bf:{prop}" not in content: |
|
|
fixes.append(INSTANT_FIXES[prop]) |
|
|
if steps_log: |
|
|
steps_log.append(f" β
Will add missing '{prop}' property") |
|
|
elif prop in INSTANT_FIXES: |
|
|
if steps_log: |
|
|
steps_log.append(f" βΉοΈ Property '{prop}' already exists, skipping") |
|
|
elif steps_log: |
|
|
steps_log.append(f" β οΈ No template for '{prop}', skipping") |
|
|
|
|
|
if not fixes: |
|
|
if steps_log: |
|
|
steps_log.append("β Rapid fix: No properties could be fixed") |
|
|
return None |
|
|
|
|
|
|
|
|
if steps_log: |
|
|
steps_log.append(f"π¨ Adding {len(fixes)} missing properties to {target_type}") |
|
|
fixed_content = opening_tag + content + '\n ' + '\n '.join(fixes) + '\n' + closing_tag |
|
|
|
|
|
|
|
|
result = rdf_content.replace(match.group(0), fixed_content) |
|
|
|
|
|
if steps_log: |
|
|
steps_log.append(f"β
Rapid fix complete: Added {len(fixes)} properties") |
|
|
|
|
|
return result |
|
|
|
|
|
|
|
|
print("=" * 80) |
|
|
print("π§ͺ TESTING RAPID FIX LOGIC") |
|
|
print("=" * 80) |
|
|
print("\nπ INPUT RDF:") |
|
|
print(SAMPLE_INVALID_RDF) |
|
|
print("\nβ VALIDATION ERRORS:") |
|
|
print(SAMPLE_VALIDATION_ERRORS) |
|
|
|
|
|
steps_log = [] |
|
|
result = rapid_fix_missing_properties(SAMPLE_INVALID_RDF, SAMPLE_VALIDATION_ERRORS, 'monograph', steps_log) |
|
|
|
|
|
print("\n" + "=" * 80) |
|
|
print("π STEP-BY-STEP LOG:") |
|
|
print("=" * 80) |
|
|
for step in steps_log: |
|
|
print(step) |
|
|
|
|
|
print("\n" + "=" * 80) |
|
|
if result: |
|
|
print("β
RAPID FIX PRODUCED OUTPUT:") |
|
|
print("=" * 80) |
|
|
print(result) |
|
|
|
|
|
print("\n" + "=" * 80) |
|
|
print("π ANALYSIS:") |
|
|
print("=" * 80) |
|
|
|
|
|
if "<bf:language>" in result: |
|
|
print("β
Added bf:language") |
|
|
if "<bf:content>" in result: |
|
|
print("β
Added bf:content") |
|
|
if "<bf:adminMetadata>" in result: |
|
|
print("β
Added bf:adminMetadata") |
|
|
if "<bf:assigner>" in result: |
|
|
print(" β
AdminMetadata includes bf:assigner") |
|
|
else: |
|
|
print(" β AdminMetadata MISSING bf:assigner!") |
|
|
else: |
|
|
print("β RAPID FIX RETURNED None") |
|
|
|