RDF Validation Deployment
commited on
Commit
Β·
e0e9b68
1
Parent(s):
4f8c53c
AI iteration with re-validation + steps log UI; show steps by default; optional iterate-until-valid with max attempts
Browse files
app.py
CHANGED
|
@@ -222,25 +222,13 @@ def filter_validation_results_by_class(validation_results: str, rdf_content: str
|
|
| 222 |
return {k: '\n'.join(v) for k, v in class_results.items() if v}
|
| 223 |
|
| 224 |
def get_ai_suggestions(validation_results: str, rdf_content: str, include_warnings: bool = False) -> str:
|
|
|
|
|
|
|
|
|
|
| 225 |
"""
|
| 226 |
-
Generate AI-powered fix suggestions for invalid RDF/XML.
|
| 227 |
-
|
| 228 |
-
This tool analyzes validation results and provides actionable suggestions
|
| 229 |
-
for fixing RDF/XML validation errors using AI or rule-based analysis.
|
| 230 |
-
|
| 231 |
-
Args:
|
| 232 |
-
validation_results (str): The validation error messages
|
| 233 |
-
rdf_content (str): The original RDF/XML content that failed validation
|
| 234 |
-
include_warnings (bool): Whether to include warnings in suggestions
|
| 235 |
-
|
| 236 |
-
Returns:
|
| 237 |
-
str: Detailed suggestions for fixing the RDF validation issues
|
| 238 |
-
"""
|
| 239 |
-
|
| 240 |
if not OPENAI_AVAILABLE:
|
| 241 |
return generate_manual_suggestions(validation_results)
|
| 242 |
-
|
| 243 |
-
# Get API key dynamically at runtime
|
| 244 |
current_api_key = os.getenv('HF_API_KEY', '')
|
| 245 |
if not current_api_key:
|
| 246 |
return f"""
|
|
@@ -248,9 +236,8 @@ def get_ai_suggestions(validation_results: str, rdf_content: str, include_warnin
|
|
| 248 |
|
| 249 |
{generate_manual_suggestions(validation_results)}
|
| 250 |
"""
|
| 251 |
-
|
| 252 |
try:
|
| 253 |
-
# Use OpenAI client with your Hugging Face Inference Endpoint
|
| 254 |
client = get_openai_client()
|
| 255 |
if not client:
|
| 256 |
return f"""
|
|
@@ -258,26 +245,36 @@ def get_ai_suggestions(validation_results: str, rdf_content: str, include_warnin
|
|
| 258 |
|
| 259 |
{generate_manual_suggestions(validation_results)}
|
| 260 |
"""
|
| 261 |
-
|
| 262 |
-
severity_instruction =
|
| 263 |
-
|
| 264 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 265 |
class_results = filter_validation_results_by_class(validation_results, rdf_content)
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
|
|
|
|
|
|
| 272 |
relevant_rdf = extract_relevant_rdf_section(rdf_content, primary_class)
|
| 273 |
-
|
| 274 |
-
prompt = f"""
|
|
|
|
| 275 |
|
| 276 |
{severity_instruction}
|
| 277 |
|
| 278 |
Validation Errors for {primary_class}:
|
| 279 |
{focused_results[:1500]}
|
| 280 |
|
|
|
|
|
|
|
|
|
|
| 281 |
Relevant RDF Section:
|
| 282 |
{relevant_rdf[:800]}
|
| 283 |
|
|
@@ -299,34 +296,35 @@ Format:
|
|
| 299 |
```
|
| 300 |
|
| 301 |
Be ultra-concise. Show the fix, not explanations."""
|
| 302 |
-
|
| 303 |
-
# Make API call using OpenAI client
|
| 304 |
-
print(f"π Making focused API call for {primary_class} class")
|
| 305 |
-
print(f"π Sending {len(focused_results)} chars instead of {len(validation_results)} chars")
|
| 306 |
-
|
| 307 |
chat_completion = client.chat.completions.create(
|
| 308 |
model=HF_MODEL,
|
| 309 |
messages=[
|
|
|
|
|
|
|
|
|
|
|
|
|
| 310 |
{
|
| 311 |
"role": "user",
|
| 312 |
"content": prompt
|
| 313 |
}
|
| 314 |
],
|
| 315 |
-
max_tokens=800,
|
| 316 |
-
temperature=0.5,
|
| 317 |
top_p=0.9
|
| 318 |
)
|
| 319 |
-
|
| 320 |
-
print("β
API call successful")
|
| 321 |
generated_text = chat_completion.choices[0].message.content
|
| 322 |
-
|
| 323 |
-
|
| 324 |
other_classes = [k for k in class_results.keys() if k != primary_class]
|
| 325 |
-
class_note =
|
| 326 |
-
|
| 327 |
-
|
|
|
|
|
|
|
| 328 |
return f"π€ **AI-Powered Suggestions ({('Violations + Warnings' if include_warnings else 'Violations Only')}):**\n\n{generated_text}{class_note}"
|
| 329 |
-
|
| 330 |
except Exception as e:
|
| 331 |
logger.error(f"OpenAI/HF Inference Endpoint error: {str(e)}")
|
| 332 |
return f"""
|
|
@@ -372,7 +370,7 @@ def extract_relevant_rdf_section(rdf_content: str, class_name: str) -> str:
|
|
| 372 |
|
| 373 |
return rdf_content[:1000] # Fallback
|
| 374 |
|
| 375 |
-
def get_ai_correction(validation_results: str, rdf_content: str, template: str = 'monograph', max_attempts: int = None, include_warnings: bool = False) -> str:
|
| 376 |
"""
|
| 377 |
Generate AI-powered corrected RDF/XML based on validation errors.
|
| 378 |
|
|
@@ -391,20 +389,31 @@ def get_ai_correction(validation_results: str, rdf_content: str, template: str =
|
|
| 391 |
str: Corrected RDF/XML that should pass validation
|
| 392 |
"""
|
| 393 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 394 |
# Use configuration default if not specified
|
| 395 |
if max_attempts is None:
|
| 396 |
max_attempts = MAX_CORRECTION_ATTEMPTS
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
|
|
|
|
|
|
|
|
|
| 401 |
|
| 402 |
if not OPENAI_AVAILABLE:
|
|
|
|
|
|
|
| 403 |
return generate_manual_correction_hints(validation_results, rdf_content)
|
| 404 |
|
| 405 |
# Get API key dynamically at runtime
|
| 406 |
current_api_key = os.getenv('HF_API_KEY', '')
|
| 407 |
if not current_api_key:
|
|
|
|
|
|
|
| 408 |
return f"""<!-- AI correction disabled: Set HF_API_KEY as a Secret in your Space settings -->
|
| 409 |
|
| 410 |
{generate_manual_correction_hints(validation_results, rdf_content)}"""
|
|
@@ -412,6 +421,8 @@ def get_ai_correction(validation_results: str, rdf_content: str, template: str =
|
|
| 412 |
try:
|
| 413 |
client = get_openai_client()
|
| 414 |
if not client:
|
|
|
|
|
|
|
| 415 |
return f"""<!-- AI correction disabled: HF_API_KEY not configured -->
|
| 416 |
|
| 417 |
{generate_manual_correction_hints(validation_results, rdf_content)}"""
|
|
@@ -420,6 +431,8 @@ def get_ai_correction(validation_results: str, rdf_content: str, template: str =
|
|
| 420 |
import time
|
| 421 |
start_time = time.time()
|
| 422 |
timeout = 60 # 60 second timeout
|
|
|
|
|
|
|
| 423 |
|
| 424 |
severity_instruction = "Fix only the violations (errors) and ignore any warnings." if not include_warnings else "Fix both violations and warnings."
|
| 425 |
|
|
@@ -661,7 +674,7 @@ def extract_rdf_from_response(response: str) -> str:
|
|
| 661 |
# If no code blocks found, return the response as-is
|
| 662 |
return response
|
| 663 |
|
| 664 |
-
def get_ai_correction(validation_results: str, rdf_content: str, template: str = 'monograph', max_attempts: int = None, include_warnings: bool = False) -> str:
|
| 665 |
"""
|
| 666 |
Generate AI-powered corrected RDF/XML based on validation errors.
|
| 667 |
|
|
@@ -680,20 +693,31 @@ def get_ai_correction(validation_results: str, rdf_content: str, template: str =
|
|
| 680 |
str: Corrected RDF/XML that should pass validation
|
| 681 |
"""
|
| 682 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 683 |
# Use configuration default if not specified
|
| 684 |
if max_attempts is None:
|
| 685 |
max_attempts = MAX_CORRECTION_ATTEMPTS
|
| 686 |
-
|
| 687 |
-
|
| 688 |
-
|
| 689 |
-
|
|
|
|
|
|
|
|
|
|
| 690 |
|
| 691 |
if not OPENAI_AVAILABLE:
|
|
|
|
|
|
|
| 692 |
return generate_manual_correction_hints(validation_results, rdf_content)
|
| 693 |
|
| 694 |
# Get API key dynamically at runtime
|
| 695 |
current_api_key = os.getenv('HF_API_KEY', '')
|
| 696 |
if not current_api_key:
|
|
|
|
|
|
|
| 697 |
return f"""<!-- AI correction disabled: Set HF_API_KEY as a Secret in your Space settings -->
|
| 698 |
|
| 699 |
{generate_manual_correction_hints(validation_results, rdf_content)}"""
|
|
@@ -701,6 +725,8 @@ def get_ai_correction(validation_results: str, rdf_content: str, template: str =
|
|
| 701 |
try:
|
| 702 |
client = get_openai_client()
|
| 703 |
if not client:
|
|
|
|
|
|
|
| 704 |
return f"""<!-- AI correction disabled: HF_API_KEY not configured -->
|
| 705 |
|
| 706 |
{generate_manual_correction_hints(validation_results, rdf_content)}"""
|
|
@@ -709,17 +735,25 @@ def get_ai_correction(validation_results: str, rdf_content: str, template: str =
|
|
| 709 |
import time
|
| 710 |
start_time = time.time()
|
| 711 |
timeout = 60 # 60 second timeout
|
|
|
|
|
|
|
| 712 |
|
| 713 |
severity_instruction = "Fix only the violations (errors) and ignore any warnings." if not include_warnings else "Fix both violations and warnings."
|
| 714 |
|
| 715 |
# Try multiple attempts to generate valid RDF
|
| 716 |
for attempt in range(max_attempts):
|
| 717 |
# Check timeout
|
| 718 |
-
|
|
|
|
|
|
|
|
|
|
| 719 |
print(f"β° Timeout reached after {timeout} seconds")
|
| 720 |
break
|
| 721 |
|
| 722 |
-
|
|
|
|
|
|
|
|
|
|
| 723 |
|
| 724 |
prompt = f"""You are an expert in RDF/XML. Fix the following RDF/XML based on the validation errors provided.
|
| 725 |
|
|
@@ -755,6 +789,8 @@ Please provide the corrected RDF/XML that addresses all validation issues.
|
|
| 755 |
)
|
| 756 |
|
| 757 |
corrected_rdf = chat_completion.choices[0].message.content.strip()
|
|
|
|
|
|
|
| 758 |
|
| 759 |
# Extract RDF content if it's wrapped in code blocks
|
| 760 |
corrected_rdf = extract_rdf_from_response(corrected_rdf)
|
|
@@ -766,32 +802,44 @@ Please provide the corrected RDF/XML that addresses all validation issues.
|
|
| 766 |
conforms, new_results = validate_rdf(corrected_rdf.encode('utf-8'), template)
|
| 767 |
|
| 768 |
if conforms:
|
| 769 |
-
|
|
|
|
|
|
|
| 770 |
return f"""<!-- AI-generated correction validated successfully -->
|
| 771 |
{corrected_rdf}"""
|
| 772 |
else:
|
| 773 |
-
|
|
|
|
|
|
|
| 774 |
# Update validation_results for next attempt
|
| 775 |
validation_results = new_results
|
| 776 |
|
| 777 |
except Exception as e:
|
| 778 |
-
|
|
|
|
|
|
|
| 779 |
# If validation fails, return the correction anyway
|
| 780 |
return f"""<!-- AI-generated correction (validation check failed) -->
|
| 781 |
{corrected_rdf}"""
|
| 782 |
else:
|
| 783 |
# If validator not available or timeout approaching, return the correction
|
|
|
|
|
|
|
| 784 |
print("β οΈ Returning correction without validation")
|
| 785 |
return f"""<!-- AI-generated correction (validation skipped) -->
|
| 786 |
{corrected_rdf}"""
|
| 787 |
|
| 788 |
except Exception as api_error:
|
| 789 |
-
|
|
|
|
|
|
|
| 790 |
if attempt == max_attempts - 1: # Last attempt
|
| 791 |
raise api_error
|
| 792 |
continue
|
| 793 |
|
| 794 |
# All attempts failed or timed out
|
|
|
|
|
|
|
| 795 |
return f"""<!-- AI correction failed after {max_attempts} attempts or timeout -->
|
| 796 |
<!-- Please correct manually using the validation results as a guide -->
|
| 797 |
|
|
@@ -799,6 +847,8 @@ Please provide the corrected RDF/XML that addresses all validation issues.
|
|
| 799 |
|
| 800 |
except Exception as e:
|
| 801 |
logger.error(f"LLM API error: {str(e)}")
|
|
|
|
|
|
|
| 802 |
return f"""<!-- Error generating AI correction: {str(e)} -->
|
| 803 |
|
| 804 |
{generate_manual_correction_hints(validation_results, rdf_content)}"""
|
|
@@ -972,7 +1022,7 @@ def extract_xml_from_text(text: str) -> str:
|
|
| 972 |
fenced = re.sub(r"^```[a-zA-Z]*\n|```$", "", text.strip())
|
| 973 |
return fenced if fenced else text
|
| 974 |
|
| 975 |
-
def validate_rdf_interface(rdf_content: str, template: str, use_ai: bool = True):
|
| 976 |
"""Main validation function for Gradio interface"""
|
| 977 |
if not rdf_content.strip():
|
| 978 |
return "β Error", "No RDF/XML data provided", "", "", "", ""
|
|
@@ -981,7 +1031,7 @@ def validate_rdf_interface(rdf_content: str, template: str, use_ai: bool = True)
|
|
| 981 |
result = validate_rdf_tool(rdf_content, template)
|
| 982 |
|
| 983 |
if "error" in result:
|
| 984 |
-
return f"β Error: {result['error']}", "", "", "", "", ""
|
| 985 |
|
| 986 |
status = result["status"]
|
| 987 |
results_text = result["results"]
|
|
@@ -1005,19 +1055,59 @@ def validate_rdf_interface(rdf_content: str, template: str, use_ai: bool = True)
|
|
| 1005 |
|
| 1006 |
filtered_results = '\n'.join(filtered_lines)
|
| 1007 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1008 |
if result["conforms"]:
|
| 1009 |
suggestions = "β
No issues found! Your RDF/XML is valid according to the selected template."
|
| 1010 |
-
corrected_rdf = "
|
|
|
|
|
|
|
|
|
|
| 1011 |
else:
|
| 1012 |
if use_ai:
|
| 1013 |
# Pass filtered results to AI functions
|
| 1014 |
suggestions = get_ai_suggestions(filtered_results, rdf_content, include_warnings)
|
| 1015 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1016 |
else:
|
| 1017 |
suggestions = generate_manual_suggestions(filtered_results)
|
| 1018 |
corrected_rdf = generate_manual_correction_hints(filtered_results, rdf_content)
|
|
|
|
|
|
|
|
|
|
| 1019 |
|
| 1020 |
-
|
|
|
|
| 1021 |
|
| 1022 |
def get_rdf_examples(example_type: str = "valid") -> str:
|
| 1023 |
"""
|
|
@@ -1145,6 +1235,26 @@ def create_interface():
|
|
| 1145 |
value=False,
|
| 1146 |
info="Include warnings in AI corrections (violations only by default)"
|
| 1147 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1148 |
|
| 1149 |
validate_btn = gr.Button("π Validate RDF", variant="primary", size="lg")
|
| 1150 |
|
|
@@ -1181,6 +1291,14 @@ def create_interface():
|
|
| 1181 |
lines=8,
|
| 1182 |
show_copy_button=True
|
| 1183 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1184 |
|
| 1185 |
# Corrected RDF section
|
| 1186 |
with gr.Row():
|
|
@@ -1212,8 +1330,8 @@ def create_interface():
|
|
| 1212 |
# Event handlers
|
| 1213 |
validate_btn.click(
|
| 1214 |
fn=validate_rdf_interface,
|
| 1215 |
-
inputs=[rdf_input, template_dropdown, use_ai_checkbox, include_warnings_checkbox],
|
| 1216 |
-
outputs=[status_output, results_output, suggestions_output, corrected_output]
|
| 1217 |
)
|
| 1218 |
|
| 1219 |
# Remove auto-validation to prevent processing loops
|
|
@@ -1235,8 +1353,8 @@ def create_interface():
|
|
| 1235 |
)
|
| 1236 |
|
| 1237 |
clear_btn.click(
|
| 1238 |
-
lambda: ("", "", "", "", "", "", ""),
|
| 1239 |
-
outputs=[rdf_input, status_output, results_output, suggestions_output, corrected_output, corrected_status_output, corrected_results_output]
|
| 1240 |
)
|
| 1241 |
|
| 1242 |
# Footer with instructions
|
|
|
|
| 222 |
return {k: '\n'.join(v) for k, v in class_results.items() if v}
|
| 223 |
|
| 224 |
def get_ai_suggestions(validation_results: str, rdf_content: str, include_warnings: bool = False) -> str:
|
| 225 |
+
"""Generate AI-powered, plain-language suggestions based on validation results.
|
| 226 |
+
|
| 227 |
+
Avoids RDF/SHACL jargon and focuses on actionable fixes.
|
| 228 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
if not OPENAI_AVAILABLE:
|
| 230 |
return generate_manual_suggestions(validation_results)
|
| 231 |
+
|
|
|
|
| 232 |
current_api_key = os.getenv('HF_API_KEY', '')
|
| 233 |
if not current_api_key:
|
| 234 |
return f"""
|
|
|
|
| 236 |
|
| 237 |
{generate_manual_suggestions(validation_results)}
|
| 238 |
"""
|
| 239 |
+
|
| 240 |
try:
|
|
|
|
| 241 |
client = get_openai_client()
|
| 242 |
if not client:
|
| 243 |
return f"""
|
|
|
|
| 245 |
|
| 246 |
{generate_manual_suggestions(validation_results)}
|
| 247 |
"""
|
| 248 |
+
|
| 249 |
+
severity_instruction = (
|
| 250 |
+
"Focus only on violations (errors) and ignore any warnings."
|
| 251 |
+
if not include_warnings else
|
| 252 |
+
"Address both violations and warnings."
|
| 253 |
+
)
|
| 254 |
+
|
| 255 |
+
# Group errors by class to focus the prompt
|
| 256 |
class_results = filter_validation_results_by_class(validation_results, rdf_content)
|
| 257 |
+
if class_results:
|
| 258 |
+
primary_class = max(class_results.keys(), key=lambda k: len(class_results[k]))
|
| 259 |
+
focused_results = class_results[primary_class]
|
| 260 |
+
else:
|
| 261 |
+
primary_class = "Record"
|
| 262 |
+
focused_results = validation_results
|
| 263 |
+
|
| 264 |
+
simplified_summary = parse_shacl_results_for_ai(focused_results)
|
| 265 |
relevant_rdf = extract_relevant_rdf_section(rdf_content, primary_class)
|
| 266 |
+
|
| 267 |
+
prompt = f"""
|
| 268 |
+
You are a helpful metadata librarian. Write in plain language (no RDF/SHACL jargon). Analyze the validation errors for the {primary_class} and provide concise, actionable fixes.
|
| 269 |
|
| 270 |
{severity_instruction}
|
| 271 |
|
| 272 |
Validation Errors for {primary_class}:
|
| 273 |
{focused_results[:1500]}
|
| 274 |
|
| 275 |
+
Validation Summary (plain language):
|
| 276 |
+
{simplified_summary}
|
| 277 |
+
|
| 278 |
Relevant RDF Section:
|
| 279 |
{relevant_rdf[:800]}
|
| 280 |
|
|
|
|
| 296 |
```
|
| 297 |
|
| 298 |
Be ultra-concise. Show the fix, not explanations."""
|
| 299 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 300 |
chat_completion = client.chat.completions.create(
|
| 301 |
model=HF_MODEL,
|
| 302 |
messages=[
|
| 303 |
+
{
|
| 304 |
+
"role": "system",
|
| 305 |
+
"content": "You are a friendly librarian helping fix catalog records. Never use technical RDF or SHACL terminology."
|
| 306 |
+
},
|
| 307 |
{
|
| 308 |
"role": "user",
|
| 309 |
"content": prompt
|
| 310 |
}
|
| 311 |
],
|
| 312 |
+
max_tokens=800,
|
| 313 |
+
temperature=0.5,
|
| 314 |
top_p=0.9
|
| 315 |
)
|
| 316 |
+
|
|
|
|
| 317 |
generated_text = chat_completion.choices[0].message.content
|
| 318 |
+
generated_text = clean_technical_jargon(generated_text)
|
| 319 |
+
|
| 320 |
other_classes = [k for k in class_results.keys() if k != primary_class]
|
| 321 |
+
class_note = (
|
| 322 |
+
f"\n\nπ **Note:** Focused on {primary_class} errors. " +
|
| 323 |
+
(f"Also found issues in: {', '.join(other_classes)}" if other_classes else "")
|
| 324 |
+
)
|
| 325 |
+
|
| 326 |
return f"π€ **AI-Powered Suggestions ({('Violations + Warnings' if include_warnings else 'Violations Only')}):**\n\n{generated_text}{class_note}"
|
| 327 |
+
|
| 328 |
except Exception as e:
|
| 329 |
logger.error(f"OpenAI/HF Inference Endpoint error: {str(e)}")
|
| 330 |
return f"""
|
|
|
|
| 370 |
|
| 371 |
return rdf_content[:1000] # Fallback
|
| 372 |
|
| 373 |
+
def get_ai_correction(validation_results: str, rdf_content: str, template: str = 'monograph', max_attempts: int = None, include_warnings: bool = False, enable_validation_loop: bool | None = None, steps_log: Optional[List[str]] = None) -> str:
|
| 374 |
"""
|
| 375 |
Generate AI-powered corrected RDF/XML based on validation errors.
|
| 376 |
|
|
|
|
| 389 |
str: Corrected RDF/XML that should pass validation
|
| 390 |
"""
|
| 391 |
|
| 392 |
+
# Determine whether to iterate based on parameter or global default
|
| 393 |
+
iterate_enabled = ENABLE_VALIDATION_LOOP if enable_validation_loop is None else enable_validation_loop
|
| 394 |
+
if steps_log is not None:
|
| 395 |
+
steps_log.append(f"Planning correction: iterate_enabled={iterate_enabled}, include_warnings={include_warnings}")
|
| 396 |
# Use configuration default if not specified
|
| 397 |
if max_attempts is None:
|
| 398 |
max_attempts = MAX_CORRECTION_ATTEMPTS
|
| 399 |
+
if steps_log is not None:
|
| 400 |
+
steps_log.append(f"Max attempts set to {max_attempts}")
|
| 401 |
+
# If iteration disabled, force single attempt
|
| 402 |
+
if not iterate_enabled:
|
| 403 |
+
max_attempts = 1
|
| 404 |
+
if steps_log is not None:
|
| 405 |
+
steps_log.append("Iteration disabled; forcing single attempt")
|
| 406 |
|
| 407 |
if not OPENAI_AVAILABLE:
|
| 408 |
+
if steps_log is not None:
|
| 409 |
+
steps_log.append("OPENAI client not available; falling back to manual hints")
|
| 410 |
return generate_manual_correction_hints(validation_results, rdf_content)
|
| 411 |
|
| 412 |
# Get API key dynamically at runtime
|
| 413 |
current_api_key = os.getenv('HF_API_KEY', '')
|
| 414 |
if not current_api_key:
|
| 415 |
+
if steps_log is not None:
|
| 416 |
+
steps_log.append("HF_API_KEY not set; cannot call model; returning manual hints")
|
| 417 |
return f"""<!-- AI correction disabled: Set HF_API_KEY as a Secret in your Space settings -->
|
| 418 |
|
| 419 |
{generate_manual_correction_hints(validation_results, rdf_content)}"""
|
|
|
|
| 421 |
try:
|
| 422 |
client = get_openai_client()
|
| 423 |
if not client:
|
| 424 |
+
if steps_log is not None:
|
| 425 |
+
steps_log.append("Failed to initialize OpenAI client; returning manual hints")
|
| 426 |
return f"""<!-- AI correction disabled: HF_API_KEY not configured -->
|
| 427 |
|
| 428 |
{generate_manual_correction_hints(validation_results, rdf_content)}"""
|
|
|
|
| 431 |
import time
|
| 432 |
start_time = time.time()
|
| 433 |
timeout = 60 # 60 second timeout
|
| 434 |
+
if steps_log is not None:
|
| 435 |
+
steps_log.append(f"Timeout budget: {timeout}s total")
|
| 436 |
|
| 437 |
severity_instruction = "Fix only the violations (errors) and ignore any warnings." if not include_warnings else "Fix both violations and warnings."
|
| 438 |
|
|
|
|
| 674 |
# If no code blocks found, return the response as-is
|
| 675 |
return response
|
| 676 |
|
| 677 |
+
def get_ai_correction(validation_results: str, rdf_content: str, template: str = 'monograph', max_attempts: int = None, include_warnings: bool = False, enable_validation_loop: bool | None = None, steps_log: Optional[List[str]] = None) -> str:
|
| 678 |
"""
|
| 679 |
Generate AI-powered corrected RDF/XML based on validation errors.
|
| 680 |
|
|
|
|
| 693 |
str: Corrected RDF/XML that should pass validation
|
| 694 |
"""
|
| 695 |
|
| 696 |
+
# Determine whether to iterate based on parameter or global default
|
| 697 |
+
iterate_enabled = ENABLE_VALIDATION_LOOP if enable_validation_loop is None else enable_validation_loop
|
| 698 |
+
if steps_log is not None:
|
| 699 |
+
steps_log.append(f"Planning correction: iterate_enabled={iterate_enabled}, include_warnings={include_warnings}")
|
| 700 |
# Use configuration default if not specified
|
| 701 |
if max_attempts is None:
|
| 702 |
max_attempts = MAX_CORRECTION_ATTEMPTS
|
| 703 |
+
if steps_log is not None:
|
| 704 |
+
steps_log.append(f"Max attempts set to {max_attempts}")
|
| 705 |
+
# If iteration disabled, force single attempt
|
| 706 |
+
if not iterate_enabled:
|
| 707 |
+
max_attempts = 1
|
| 708 |
+
if steps_log is not None:
|
| 709 |
+
steps_log.append("Iteration disabled; forcing single attempt")
|
| 710 |
|
| 711 |
if not OPENAI_AVAILABLE:
|
| 712 |
+
if steps_log is not None:
|
| 713 |
+
steps_log.append("OPENAI client not available; falling back to manual hints")
|
| 714 |
return generate_manual_correction_hints(validation_results, rdf_content)
|
| 715 |
|
| 716 |
# Get API key dynamically at runtime
|
| 717 |
current_api_key = os.getenv('HF_API_KEY', '')
|
| 718 |
if not current_api_key:
|
| 719 |
+
if steps_log is not None:
|
| 720 |
+
steps_log.append("HF_API_KEY not set; cannot call model; returning manual hints")
|
| 721 |
return f"""<!-- AI correction disabled: Set HF_API_KEY as a Secret in your Space settings -->
|
| 722 |
|
| 723 |
{generate_manual_correction_hints(validation_results, rdf_content)}"""
|
|
|
|
| 725 |
try:
|
| 726 |
client = get_openai_client()
|
| 727 |
if not client:
|
| 728 |
+
if steps_log is not None:
|
| 729 |
+
steps_log.append("Failed to initialize OpenAI client; returning manual hints")
|
| 730 |
return f"""<!-- AI correction disabled: HF_API_KEY not configured -->
|
| 731 |
|
| 732 |
{generate_manual_correction_hints(validation_results, rdf_content)}"""
|
|
|
|
| 735 |
import time
|
| 736 |
start_time = time.time()
|
| 737 |
timeout = 60 # 60 second timeout
|
| 738 |
+
if steps_log is not None:
|
| 739 |
+
steps_log.append(f"Timeout budget: {timeout}s total")
|
| 740 |
|
| 741 |
severity_instruction = "Fix only the violations (errors) and ignore any warnings." if not include_warnings else "Fix both violations and warnings."
|
| 742 |
|
| 743 |
# Try multiple attempts to generate valid RDF
|
| 744 |
for attempt in range(max_attempts):
|
| 745 |
# Check timeout
|
| 746 |
+
elapsed = time.time() - start_time
|
| 747 |
+
if elapsed > timeout:
|
| 748 |
+
if steps_log is not None:
|
| 749 |
+
steps_log.append(f"Timeout reached after {int(elapsed)}s; stopping attempts")
|
| 750 |
print(f"β° Timeout reached after {timeout} seconds")
|
| 751 |
break
|
| 752 |
|
| 753 |
+
attempt_no = attempt + 1
|
| 754 |
+
if steps_log is not None:
|
| 755 |
+
steps_log.append(f"Attempt {attempt_no}/{max_attempts}: requesting model correction")
|
| 756 |
+
print(f"π Correction attempt {attempt_no}/{max_attempts}")
|
| 757 |
|
| 758 |
prompt = f"""You are an expert in RDF/XML. Fix the following RDF/XML based on the validation errors provided.
|
| 759 |
|
|
|
|
| 789 |
)
|
| 790 |
|
| 791 |
corrected_rdf = chat_completion.choices[0].message.content.strip()
|
| 792 |
+
if steps_log is not None:
|
| 793 |
+
steps_log.append(f"Attempt {attempt_no}: model responded; extracting XML block if present")
|
| 794 |
|
| 795 |
# Extract RDF content if it's wrapped in code blocks
|
| 796 |
corrected_rdf = extract_rdf_from_response(corrected_rdf)
|
|
|
|
| 802 |
conforms, new_results = validate_rdf(corrected_rdf.encode('utf-8'), template)
|
| 803 |
|
| 804 |
if conforms:
|
| 805 |
+
if steps_log is not None:
|
| 806 |
+
steps_log.append(f"Attempt {attempt_no}: correction PASSED validation")
|
| 807 |
+
print(f"β
Correction validated successfully on attempt {attempt_no}")
|
| 808 |
return f"""<!-- AI-generated correction validated successfully -->
|
| 809 |
{corrected_rdf}"""
|
| 810 |
else:
|
| 811 |
+
if steps_log is not None:
|
| 812 |
+
steps_log.append(f"Attempt {attempt_no}: still invalid; will retry with updated errors")
|
| 813 |
+
print(f"β Correction attempt {attempt_no} still has validation errors")
|
| 814 |
# Update validation_results for next attempt
|
| 815 |
validation_results = new_results
|
| 816 |
|
| 817 |
except Exception as e:
|
| 818 |
+
if steps_log is not None:
|
| 819 |
+
steps_log.append(f"Attempt {attempt_no}: error during validation: {str(e)} β returning correction anyway")
|
| 820 |
+
print(f"β οΈ Error validating correction attempt {attempt_no}: {str(e)}")
|
| 821 |
# If validation fails, return the correction anyway
|
| 822 |
return f"""<!-- AI-generated correction (validation check failed) -->
|
| 823 |
{corrected_rdf}"""
|
| 824 |
else:
|
| 825 |
# If validator not available or timeout approaching, return the correction
|
| 826 |
+
if steps_log is not None:
|
| 827 |
+
steps_log.append("Skipping validation check (validator unavailable or timeout)")
|
| 828 |
print("β οΈ Returning correction without validation")
|
| 829 |
return f"""<!-- AI-generated correction (validation skipped) -->
|
| 830 |
{corrected_rdf}"""
|
| 831 |
|
| 832 |
except Exception as api_error:
|
| 833 |
+
if steps_log is not None:
|
| 834 |
+
steps_log.append(f"Attempt {attempt_no}: API error: {str(api_error)}")
|
| 835 |
+
print(f"β API error on attempt {attempt_no}: {str(api_error)}")
|
| 836 |
if attempt == max_attempts - 1: # Last attempt
|
| 837 |
raise api_error
|
| 838 |
continue
|
| 839 |
|
| 840 |
# All attempts failed or timed out
|
| 841 |
+
if steps_log is not None:
|
| 842 |
+
steps_log.append("All attempts failed or timed out; returning manual hints")
|
| 843 |
return f"""<!-- AI correction failed after {max_attempts} attempts or timeout -->
|
| 844 |
<!-- Please correct manually using the validation results as a guide -->
|
| 845 |
|
|
|
|
| 847 |
|
| 848 |
except Exception as e:
|
| 849 |
logger.error(f"LLM API error: {str(e)}")
|
| 850 |
+
if steps_log is not None:
|
| 851 |
+
steps_log.append(f"Fatal error invoking model: {str(e)}")
|
| 852 |
return f"""<!-- Error generating AI correction: {str(e)} -->
|
| 853 |
|
| 854 |
{generate_manual_correction_hints(validation_results, rdf_content)}"""
|
|
|
|
| 1022 |
fenced = re.sub(r"^```[a-zA-Z]*\n|```$", "", text.strip())
|
| 1023 |
return fenced if fenced else text
|
| 1024 |
|
| 1025 |
+
def validate_rdf_interface(rdf_content: str, template: str, use_ai: bool = True, include_warnings: bool = False, iterate_until_valid: bool = False, max_attempts: int = 3, show_steps: bool = False):
|
| 1026 |
"""Main validation function for Gradio interface"""
|
| 1027 |
if not rdf_content.strip():
|
| 1028 |
return "β Error", "No RDF/XML data provided", "", "", "", ""
|
|
|
|
| 1031 |
result = validate_rdf_tool(rdf_content, template)
|
| 1032 |
|
| 1033 |
if "error" in result:
|
| 1034 |
+
return f"β Error: {result['error']}", "", "", "", "", "", ""
|
| 1035 |
|
| 1036 |
status = result["status"]
|
| 1037 |
results_text = result["results"]
|
|
|
|
| 1055 |
|
| 1056 |
filtered_results = '\n'.join(filtered_lines)
|
| 1057 |
|
| 1058 |
+
corrected_status = ""
|
| 1059 |
+
corrected_results = ""
|
| 1060 |
+
steps_log: List[str] = []
|
| 1061 |
+
steps_log.append(f"Initial validation: {'PASSED' if result['conforms'] else 'FAILED'} using template '{template}'")
|
| 1062 |
+
if not include_warnings:
|
| 1063 |
+
steps_log.append("Configured to ignore warnings in AI processing")
|
| 1064 |
+
if iterate_until_valid:
|
| 1065 |
+
steps_log.append(f"Iteration enabled with max_attempts={max_attempts}")
|
| 1066 |
if result["conforms"]:
|
| 1067 |
suggestions = "β
No issues found! Your RDF/XML is valid according to the selected template."
|
| 1068 |
+
corrected_rdf = "<!-- Already valid - no corrections needed -->\n" + rdf_content
|
| 1069 |
+
corrected_status = "β"
|
| 1070 |
+
corrected_results = ""
|
| 1071 |
+
steps_log.append("No correction needed; record already conforms")
|
| 1072 |
else:
|
| 1073 |
if use_ai:
|
| 1074 |
# Pass filtered results to AI functions
|
| 1075 |
suggestions = get_ai_suggestions(filtered_results, rdf_content, include_warnings)
|
| 1076 |
+
steps_log.append("Requested AI suggestions for concise guidance")
|
| 1077 |
+
corrected_rdf = get_ai_correction(
|
| 1078 |
+
filtered_results,
|
| 1079 |
+
rdf_content,
|
| 1080 |
+
template,
|
| 1081 |
+
max_attempts=max_attempts,
|
| 1082 |
+
include_warnings=include_warnings,
|
| 1083 |
+
enable_validation_loop=iterate_until_valid,
|
| 1084 |
+
steps_log=steps_log,
|
| 1085 |
+
)
|
| 1086 |
+
# Attempt re-validation of corrected RDF
|
| 1087 |
+
try:
|
| 1088 |
+
corrected_xml = extract_xml_from_text(corrected_rdf)
|
| 1089 |
+
reval = validate_rdf_tool(corrected_xml, template)
|
| 1090 |
+
if "error" in reval:
|
| 1091 |
+
corrected_status = f"β Re-validation Error: {reval['error']}"
|
| 1092 |
+
corrected_results = ""
|
| 1093 |
+
steps_log.append(f"Re-validation failed with error: {reval['error']}")
|
| 1094 |
+
else:
|
| 1095 |
+
corrected_status = reval.get("status", "")
|
| 1096 |
+
corrected_results = reval.get("results", "")
|
| 1097 |
+
steps_log.append(f"Re-validation: {corrected_status}")
|
| 1098 |
+
except Exception as re_ex:
|
| 1099 |
+
corrected_status = f"β Re-validation Error: {re_ex}"
|
| 1100 |
+
corrected_results = ""
|
| 1101 |
+
steps_log.append(f"Re-validation error: {re_ex}")
|
| 1102 |
else:
|
| 1103 |
suggestions = generate_manual_suggestions(filtered_results)
|
| 1104 |
corrected_rdf = generate_manual_correction_hints(filtered_results, rdf_content)
|
| 1105 |
+
corrected_status = "β"
|
| 1106 |
+
corrected_results = ""
|
| 1107 |
+
steps_log.append("AI disabled; produced manual suggestions and hints")
|
| 1108 |
|
| 1109 |
+
steps_text = "\n".join(steps_log) if show_steps else ""
|
| 1110 |
+
return status, results_text, suggestions, steps_text, corrected_rdf, corrected_status, corrected_results
|
| 1111 |
|
| 1112 |
def get_rdf_examples(example_type: str = "valid") -> str:
|
| 1113 |
"""
|
|
|
|
| 1235 |
value=False,
|
| 1236 |
info="Include warnings in AI corrections (violations only by default)"
|
| 1237 |
)
|
| 1238 |
+
|
| 1239 |
+
with gr.Row():
|
| 1240 |
+
iterate_checkbox = gr.Checkbox(
|
| 1241 |
+
label="Iterate until valid",
|
| 1242 |
+
value=False,
|
| 1243 |
+
info="Try multiple correction attempts until validation passes or attempts run out"
|
| 1244 |
+
)
|
| 1245 |
+
max_attempts_slider = gr.Slider(
|
| 1246 |
+
label="Max attempts",
|
| 1247 |
+
minimum=1,
|
| 1248 |
+
maximum=5,
|
| 1249 |
+
value=3,
|
| 1250 |
+
step=1,
|
| 1251 |
+
info="Maximum number of correction attempts when iterating"
|
| 1252 |
+
)
|
| 1253 |
+
show_steps_checkbox = gr.Checkbox(
|
| 1254 |
+
label="Show steps",
|
| 1255 |
+
value=True,
|
| 1256 |
+
info="Display step-by-step process (turn off for a simpler response)"
|
| 1257 |
+
)
|
| 1258 |
|
| 1259 |
validate_btn = gr.Button("π Validate RDF", variant="primary", size="lg")
|
| 1260 |
|
|
|
|
| 1291 |
lines=8,
|
| 1292 |
show_copy_button=True
|
| 1293 |
)
|
| 1294 |
+
|
| 1295 |
+
steps_output = gr.Textbox(
|
| 1296 |
+
label="π§ Correction Steps",
|
| 1297 |
+
interactive=False,
|
| 1298 |
+
lines=10,
|
| 1299 |
+
show_copy_button=True,
|
| 1300 |
+
placeholder="Step-by-step log of how the system derived the corrected XML"
|
| 1301 |
+
)
|
| 1302 |
|
| 1303 |
# Corrected RDF section
|
| 1304 |
with gr.Row():
|
|
|
|
| 1330 |
# Event handlers
|
| 1331 |
validate_btn.click(
|
| 1332 |
fn=validate_rdf_interface,
|
| 1333 |
+
inputs=[rdf_input, template_dropdown, use_ai_checkbox, include_warnings_checkbox, iterate_checkbox, max_attempts_slider, show_steps_checkbox],
|
| 1334 |
+
outputs=[status_output, results_output, suggestions_output, steps_output, corrected_output, corrected_status_output, corrected_results_output]
|
| 1335 |
)
|
| 1336 |
|
| 1337 |
# Remove auto-validation to prevent processing loops
|
|
|
|
| 1353 |
)
|
| 1354 |
|
| 1355 |
clear_btn.click(
|
| 1356 |
+
lambda: ("", "", "", "", "", "", "", ""),
|
| 1357 |
+
outputs=[rdf_input, status_output, results_output, suggestions_output, steps_output, corrected_output, corrected_status_output, corrected_results_output]
|
| 1358 |
)
|
| 1359 |
|
| 1360 |
# Footer with instructions
|