RDF Validation Deployment
commited on
Commit
Β·
bdd95fd
1
Parent(s):
37cea3a
UI: simplify form via Advanced options; LLM: targeted AdminMetadata bf:assigner guidance; enable iteration defaults
Browse files
app.py
CHANGED
|
@@ -188,6 +188,7 @@ def filter_validation_results_by_class(validation_results: str, rdf_content: str
|
|
| 188 |
'Instance': [],
|
| 189 |
'Title': [],
|
| 190 |
'Contribution': [],
|
|
|
|
| 191 |
'Other': []
|
| 192 |
}
|
| 193 |
|
|
@@ -205,6 +206,9 @@ def filter_validation_results_by_class(validation_results: str, rdf_content: str
|
|
| 205 |
current_class = 'Title'
|
| 206 |
elif 'bf:Contribution' in line:
|
| 207 |
current_class = 'Contribution'
|
|
|
|
|
|
|
|
|
|
| 208 |
|
| 209 |
# Collect lines for current violation
|
| 210 |
if 'Constraint Violation' in line:
|
|
@@ -351,7 +355,8 @@ def extract_relevant_rdf_section(rdf_content: str, class_name: str) -> str:
|
|
| 351 |
'Work': r'<bf:Work.*?</bf:Work>',
|
| 352 |
'Instance': r'<bf:Instance.*?</bf:Instance>',
|
| 353 |
'Title': r'<bf:Title.*?</bf:Title>',
|
| 354 |
-
'Contribution': r'<bf:Contribution.*?</bf:Contribution>'
|
|
|
|
| 355 |
}
|
| 356 |
|
| 357 |
pattern = patterns.get(class_name)
|
|
@@ -456,7 +461,7 @@ def get_ai_correction(validation_results: str, rdf_content: str, template: str =
|
|
| 456 |
# Extract relevant section
|
| 457 |
relevant_section = extract_relevant_rdf_section(rdf_content, class_name)
|
| 458 |
|
| 459 |
-
|
| 460 |
|
| 461 |
{severity_instruction}
|
| 462 |
|
|
@@ -467,6 +472,18 @@ Current {class_name} RDF:
|
|
| 467 |
{relevant_section[:800]}
|
| 468 |
|
| 469 |
Return ONLY the corrected {class_name} XML section. No explanations."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 470 |
|
| 471 |
try:
|
| 472 |
chat_completion = client.chat.completions.create(
|
|
@@ -525,7 +542,8 @@ def merge_corrected_sections(original_rdf: str, corrected_sections: dict) -> str
|
|
| 525 |
'Work': r'<bf:Work.*?</bf:Work>',
|
| 526 |
'Instance': r'<bf:Instance.*?</bf:Instance>',
|
| 527 |
'Title': r'<bf:Title.*?</bf:Title>',
|
| 528 |
-
'Contribution': r'<bf:Contribution.*?</bf:Contribution>'
|
|
|
|
| 529 |
}
|
| 530 |
|
| 531 |
pattern = patterns.get(class_name)
|
|
@@ -1297,45 +1315,44 @@ def create_interface():
|
|
| 1297 |
show_copy_button=True
|
| 1298 |
)
|
| 1299 |
|
| 1300 |
-
|
| 1301 |
-
|
| 1302 |
-
|
| 1303 |
-
|
| 1304 |
-
|
| 1305 |
-
|
| 1306 |
-
|
| 1307 |
-
|
| 1308 |
-
|
| 1309 |
-
|
| 1310 |
-
|
| 1311 |
-
|
| 1312 |
-
|
| 1313 |
-
|
| 1314 |
-
|
| 1315 |
-
|
| 1316 |
-
|
| 1317 |
-
|
| 1318 |
-
|
| 1319 |
-
|
| 1320 |
-
|
| 1321 |
-
|
| 1322 |
-
|
| 1323 |
-
|
| 1324 |
-
|
| 1325 |
-
|
| 1326 |
-
|
| 1327 |
-
|
| 1328 |
-
|
| 1329 |
-
|
| 1330 |
-
|
| 1331 |
-
|
| 1332 |
-
|
| 1333 |
-
|
| 1334 |
-
|
| 1335 |
-
|
| 1336 |
-
|
| 1337 |
-
|
| 1338 |
-
)
|
| 1339 |
|
| 1340 |
validate_btn = gr.Button("π Validate RDF", variant="primary", size="lg")
|
| 1341 |
|
|
|
|
| 188 |
'Instance': [],
|
| 189 |
'Title': [],
|
| 190 |
'Contribution': [],
|
| 191 |
+
'AdminMetadata': [],
|
| 192 |
'Other': []
|
| 193 |
}
|
| 194 |
|
|
|
|
| 206 |
current_class = 'Title'
|
| 207 |
elif 'bf:Contribution' in line:
|
| 208 |
current_class = 'Contribution'
|
| 209 |
+
elif 'bf:AdminMetadata' in line or 'AdminMetadata' in line or '->bf:assigner' in line:
|
| 210 |
+
# Many admin violations show assigner path; map to AdminMetadata
|
| 211 |
+
current_class = 'AdminMetadata'
|
| 212 |
|
| 213 |
# Collect lines for current violation
|
| 214 |
if 'Constraint Violation' in line:
|
|
|
|
| 355 |
'Work': r'<bf:Work.*?</bf:Work>',
|
| 356 |
'Instance': r'<bf:Instance.*?</bf:Instance>',
|
| 357 |
'Title': r'<bf:Title.*?</bf:Title>',
|
| 358 |
+
'Contribution': r'<bf:Contribution.*?</bf:Contribution>',
|
| 359 |
+
'AdminMetadata': r'<bf:AdminMetadata.*?</bf:AdminMetadata>'
|
| 360 |
}
|
| 361 |
|
| 362 |
pattern = patterns.get(class_name)
|
|
|
|
| 461 |
# Extract relevant section
|
| 462 |
relevant_section = extract_relevant_rdf_section(rdf_content, class_name)
|
| 463 |
|
| 464 |
+
base_prompt = f"""Fix this {class_name} RDF section based on these specific errors.
|
| 465 |
|
| 466 |
{severity_instruction}
|
| 467 |
|
|
|
|
| 472 |
{relevant_section[:800]}
|
| 473 |
|
| 474 |
Return ONLY the corrected {class_name} XML section. No explanations."""
|
| 475 |
+
# Targeted guidance for AdminMetadata -> bf:assigner
|
| 476 |
+
if class_name == 'AdminMetadata' and ('bf:assigner' in class_errors or '->bf:assigner' in class_errors):
|
| 477 |
+
guidance = """
|
| 478 |
+
Every <bf:AdminMetadata> MUST have a direct <bf:assigner> child.
|
| 479 |
+
If <bf:agent rdf:resource="..."/> exists, add <bf:assigner rdf:resource="..."/> with the SAME URI.
|
| 480 |
+
If <bf:descriptionModifier rdf:resource="..."/> exists, add <bf:assigner rdf:resource="..."/> with the SAME URI.
|
| 481 |
+
If neither exists but there is a <bf:identifiedBy> ... <bf:assigner rdf:resource="..."/> inside, copy that value to a TOP-LEVEL <bf:assigner> under <bf:AdminMetadata>.
|
| 482 |
+
Keep all existing content; just add the missing <bf:assigner>.
|
| 483 |
+
"""
|
| 484 |
+
prompt = guidance + "\n\n" + base_prompt
|
| 485 |
+
else:
|
| 486 |
+
prompt = base_prompt
|
| 487 |
|
| 488 |
try:
|
| 489 |
chat_completion = client.chat.completions.create(
|
|
|
|
| 542 |
'Work': r'<bf:Work.*?</bf:Work>',
|
| 543 |
'Instance': r'<bf:Instance.*?</bf:Instance>',
|
| 544 |
'Title': r'<bf:Title.*?</bf:Title>',
|
| 545 |
+
'Contribution': r'<bf:Contribution.*?</bf:Contribution>',
|
| 546 |
+
'AdminMetadata': r'<bf:AdminMetadata.*?</bf:AdminMetadata>'
|
| 547 |
}
|
| 548 |
|
| 549 |
pattern = patterns.get(class_name)
|
|
|
|
| 1315 |
show_copy_button=True
|
| 1316 |
)
|
| 1317 |
|
| 1318 |
+
# Keep the main form simple and tuck options into an accordion
|
| 1319 |
+
with gr.Accordion("Advanced options", open=False):
|
| 1320 |
+
with gr.Row():
|
| 1321 |
+
template_dropdown = gr.Dropdown(
|
| 1322 |
+
label="Validation Template",
|
| 1323 |
+
choices=["monograph", "custom"],
|
| 1324 |
+
value="monograph",
|
| 1325 |
+
info="Select the SHACL template to validate against"
|
| 1326 |
+
)
|
| 1327 |
+
use_ai_checkbox = gr.Checkbox(
|
| 1328 |
+
label="Use AI Features",
|
| 1329 |
+
value=True,
|
| 1330 |
+
info="Enable AI-powered suggestions and corrections"
|
| 1331 |
+
)
|
| 1332 |
+
include_warnings_checkbox = gr.Checkbox(
|
| 1333 |
+
label="Include Warnings",
|
| 1334 |
+
value=False,
|
| 1335 |
+
info="Include warnings in AI corrections (violations only by default)"
|
| 1336 |
+
)
|
| 1337 |
+
with gr.Row():
|
| 1338 |
+
iterate_checkbox = gr.Checkbox(
|
| 1339 |
+
label="Iterate until valid",
|
| 1340 |
+
value=True,
|
| 1341 |
+
info="Try multiple correction attempts until validation passes or attempts run out"
|
| 1342 |
+
)
|
| 1343 |
+
max_attempts_slider = gr.Slider(
|
| 1344 |
+
label="Max attempts",
|
| 1345 |
+
minimum=1,
|
| 1346 |
+
maximum=5,
|
| 1347 |
+
value=5,
|
| 1348 |
+
step=1,
|
| 1349 |
+
info="Maximum number of correction attempts when iterating"
|
| 1350 |
+
)
|
| 1351 |
+
show_steps_checkbox = gr.Checkbox(
|
| 1352 |
+
label="Show steps",
|
| 1353 |
+
value=False,
|
| 1354 |
+
info="Display step-by-step process (turn on when you want transparency)"
|
| 1355 |
+
)
|
|
|
|
| 1356 |
|
| 1357 |
validate_btn = gr.Button("π Validate RDF", variant="primary", size="lg")
|
| 1358 |
|