File size: 6,597 Bytes
a40763c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
#!/usr/bin/env python3
"""
Standalone test for rapid_fix_missing_properties - no dependencies
"""
import re
from typing import Optional, List

# Sample invalid RDF
SAMPLE_INVALID_RDF = """<?xml version="1.0" encoding="UTF-8"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
         xmlns:bf="http://id.loc.gov/ontologies/bibframe/">
    <bf:Work rdf:about="http://example.org/work/invalid-1">
        <rdf:type rdf:resource="http://id.loc.gov/ontologies/bibframe/Text"/>
        <bf:title>Incomplete Title</bf:title>
    </bf:Work>
</rdf:RDF>"""

# Validation errors
SAMPLE_VALIDATION_ERRORS = """
=== Module: MonographDCTAP/Monograph_Work_Text.tsv ===
Message: Less than 1 values on Work->bf:language
Message: Less than 1 values on Work->bf:content
Message: Less than 1 values on Work->bf:adminMetadata
"""

# Copy of the rapid_fix function
def rapid_fix_missing_properties(rdf_content: str, validation_results: str, template: str, steps_log: Optional[List[str]] = None) -> Optional[str]:
    """Ultra-fast fix for simple missing property errors - no AI needed."""
    
    # Quick pattern match for missing properties
    missing = re.findall(r"Less than \d+ values on.*->bf:(\w+)", validation_results)
    if not missing:
        if steps_log:
            steps_log.append("❌ Rapid fix: No missing properties detected in validation results")
        return None
    
    if steps_log:
        steps_log.append(f"πŸ” Rapid fix detected {len(missing)} missing properties: {', '.join(set(missing))}")
    
    # Pre-compiled property templates
    INSTANT_FIXES = {
        "title": '<bf:title><bf:Title><bf:mainTitle>Untitled</bf:mainTitle></bf:Title></bf:title>',
        "language": '<bf:language><bf:Language rdf:about="http://id.loc.gov/vocabulary/languages/eng"><rdfs:label>English</rdfs:label><bf:code>eng</bf:code></bf:Language></bf:language>',
        "content": '<bf:content><bf:Content rdf:about="http://id.loc.gov/vocabulary/contentTypes/txt"><rdfs:label>text</rdfs:label><bf:code>txt</bf:code></bf:Content></bf:content>',
        "adminMetadata": '''<bf:adminMetadata>
    <bf:AdminMetadata>
        <bf:status>
            <bf:Status rdf:about="http://id.loc.gov/vocabulary/mstatus/n">
                <rdfs:label>new</rdfs:label>
                <bf:code>n</bf:code>
            </bf:Status>
        </bf:status>
        <bf:date rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2024-01-01</bf:date>
        <bf:agent>
            <bf:Agent rdf:about="http://id.loc.gov/vocabulary/organizations/dlc">
                <rdf:type rdf:resource="http://id.loc.gov/ontologies/bibframe/Organization"/>
                <rdfs:label>Library of Congress</rdfs:label>
            </bf:Agent>
        </bf:agent>
        <bf:assigner>
            <bf:Agent rdf:about="http://id.loc.gov/vocabulary/organizations/dlc">
                <rdf:type rdf:resource="http://id.loc.gov/ontologies/bibframe/Organization"/>
                <rdfs:label>Library of Congress</rdfs:label>
            </bf:Agent>
        </bf:assigner>
    </bf:AdminMetadata>
</bf:adminMetadata>''',
    }
    
    # Find insertion point
    work_match = re.search(r'(<bf:Work[^>]*>)(.*?)(</bf:Work>)', rdf_content, re.DOTALL)
    instance_match = re.search(r'(<bf:Instance[^>]*>)(.*?)(</bf:Instance>)', rdf_content, re.DOTALL)
    
    if not work_match and not instance_match:
        if steps_log:
            steps_log.append("❌ Rapid fix: No bf:Work or bf:Instance found in RDF")
        return None
    
    match = work_match or instance_match
    target_type = "Work" if work_match else "Instance"
    opening_tag = match.group(1)
    content = match.group(2)
    closing_tag = match.group(3)
    
    if steps_log:
        steps_log.append(f"πŸ“ Rapid fix target: bf:{target_type}")
        has_admin = "<bf:adminMetadata>" in content or "<bf:AdminMetadata>" in content
        steps_log.append(f"πŸ” Current state: AdminMetadata {'EXISTS' if has_admin else 'MISSING'}")
    
    # Build fixes
    fixes = []
    
    for prop in missing[:10]:
        prop_lower = prop.lower()
        
        if steps_log:
            steps_log.append(f"πŸ” Processing property: '{prop}' (lowercase: '{prop_lower}')")
            steps_log.append(f"   Check: Is '{prop_lower}' in INSTANT_FIXES? {prop_lower in INSTANT_FIXES}")
            steps_log.append(f"   Check: Is '<bf:{prop}' in content? {'<bf:' + prop in content}")
        
        if prop in INSTANT_FIXES and f"<bf:{prop}" not in content:
            fixes.append(INSTANT_FIXES[prop])
            if steps_log:
                steps_log.append(f"   βœ… Will add missing '{prop}' property")
        elif prop in INSTANT_FIXES:
            if steps_log:
                steps_log.append(f"   ℹ️  Property '{prop}' already exists, skipping")
        elif steps_log:
            steps_log.append(f"   ⚠️  No template for '{prop}', skipping")
    
    if not fixes:
        if steps_log:
            steps_log.append("❌ Rapid fix: No properties could be fixed")
        return None
    
    # Insert all at once
    if steps_log:
        steps_log.append(f"πŸ”¨ Adding {len(fixes)} missing properties to {target_type}")
    fixed_content = opening_tag + content + '\n    ' + '\n    '.join(fixes) + '\n' + closing_tag
    
    # Replace in original RDF
    result = rdf_content.replace(match.group(0), fixed_content)
    
    if steps_log:
        steps_log.append(f"βœ… Rapid fix complete: Added {len(fixes)} properties")
    
    return result

# Run test
print("=" * 80)
print("πŸ§ͺ TESTING RAPID FIX LOGIC")
print("=" * 80)
print("\nπŸ“„ INPUT RDF:")
print(SAMPLE_INVALID_RDF)
print("\n❌ VALIDATION ERRORS:")
print(SAMPLE_VALIDATION_ERRORS)

steps_log = []
result = rapid_fix_missing_properties(SAMPLE_INVALID_RDF, SAMPLE_VALIDATION_ERRORS, 'monograph', steps_log)

print("\n" + "=" * 80)
print("πŸ“‹ STEP-BY-STEP LOG:")
print("=" * 80)
for step in steps_log:
    print(step)

print("\n" + "=" * 80)
if result:
    print("βœ… RAPID FIX PRODUCED OUTPUT:")
    print("=" * 80)
    print(result)
    
    print("\n" + "=" * 80)
    print("πŸ” ANALYSIS:")
    print("=" * 80)
    
    if "<bf:language>" in result:
        print("βœ… Added bf:language")
    if "<bf:content>" in result:
        print("βœ… Added bf:content")
    if "<bf:adminMetadata>" in result:
        print("βœ… Added bf:adminMetadata")
        if "<bf:assigner>" in result:
            print("   βœ… AdminMetadata includes bf:assigner")
        else:
            print("   ❌ AdminMetadata MISSING bf:assigner!")
else:
    print("❌ RAPID FIX RETURNED None")