RDF Validation Deployment commited on
Commit
e0e9b68
Β·
1 Parent(s): 4f8c53c

AI iteration with re-validation + steps log UI; show steps by default; optional iterate-until-valid with max attempts

Browse files
Files changed (1) hide show
  1. app.py +188 -70
app.py CHANGED
@@ -222,25 +222,13 @@ def filter_validation_results_by_class(validation_results: str, rdf_content: str
222
  return {k: '\n'.join(v) for k, v in class_results.items() if v}
223
 
224
  def get_ai_suggestions(validation_results: str, rdf_content: str, include_warnings: bool = False) -> str:
 
 
 
225
  """
226
- Generate AI-powered fix suggestions for invalid RDF/XML.
227
-
228
- This tool analyzes validation results and provides actionable suggestions
229
- for fixing RDF/XML validation errors using AI or rule-based analysis.
230
-
231
- Args:
232
- validation_results (str): The validation error messages
233
- rdf_content (str): The original RDF/XML content that failed validation
234
- include_warnings (bool): Whether to include warnings in suggestions
235
-
236
- Returns:
237
- str: Detailed suggestions for fixing the RDF validation issues
238
- """
239
-
240
  if not OPENAI_AVAILABLE:
241
  return generate_manual_suggestions(validation_results)
242
-
243
- # Get API key dynamically at runtime
244
  current_api_key = os.getenv('HF_API_KEY', '')
245
  if not current_api_key:
246
  return f"""
@@ -248,9 +236,8 @@ def get_ai_suggestions(validation_results: str, rdf_content: str, include_warnin
248
 
249
  {generate_manual_suggestions(validation_results)}
250
  """
251
-
252
  try:
253
- # Use OpenAI client with your Hugging Face Inference Endpoint
254
  client = get_openai_client()
255
  if not client:
256
  return f"""
@@ -258,26 +245,36 @@ def get_ai_suggestions(validation_results: str, rdf_content: str, include_warnin
258
 
259
  {generate_manual_suggestions(validation_results)}
260
  """
261
-
262
- severity_instruction = "Focus only on violations (errors) and ignore any warnings." if not include_warnings else "Address both violations and warnings."
263
-
264
- # Filter validation results by class to reduce token usage
 
 
 
 
265
  class_results = filter_validation_results_by_class(validation_results, rdf_content)
266
-
267
- # Determine primary class with most errors
268
- primary_class = max(class_results.keys(), key=lambda k: len(class_results[k]))
269
- focused_results = class_results[primary_class]
270
-
271
- # Extract only relevant RDF section for the primary class
 
 
272
  relevant_rdf = extract_relevant_rdf_section(rdf_content, primary_class)
273
-
274
- prompt = f"""You are an expert in RDF/XML and SHACL validation. Analyze the validation errors for the {primary_class} class and provide CONCISE, ACTIONABLE fixes.
 
275
 
276
  {severity_instruction}
277
 
278
  Validation Errors for {primary_class}:
279
  {focused_results[:1500]}
280
 
 
 
 
281
  Relevant RDF Section:
282
  {relevant_rdf[:800]}
283
 
@@ -299,34 +296,35 @@ Format:
299
  ```
300
 
301
  Be ultra-concise. Show the fix, not explanations."""
302
-
303
- # Make API call using OpenAI client
304
- print(f"πŸ”„ Making focused API call for {primary_class} class")
305
- print(f"πŸ”„ Sending {len(focused_results)} chars instead of {len(validation_results)} chars")
306
-
307
  chat_completion = client.chat.completions.create(
308
  model=HF_MODEL,
309
  messages=[
 
 
 
 
310
  {
311
  "role": "user",
312
  "content": prompt
313
  }
314
  ],
315
- max_tokens=800, # Reduced since we're focused on one class
316
- temperature=0.5, # Lower temperature for more focused responses
317
  top_p=0.9
318
  )
319
-
320
- print("βœ… API call successful")
321
  generated_text = chat_completion.choices[0].message.content
322
-
323
- # Add note about other classes if present
324
  other_classes = [k for k in class_results.keys() if k != primary_class]
325
- class_note = f"\n\nπŸ“Œ **Note:** Focused on {primary_class} errors. " + \
326
- (f"Also found issues in: {', '.join(other_classes)}" if other_classes else "")
327
-
 
 
328
  return f"πŸ€– **AI-Powered Suggestions ({('Violations + Warnings' if include_warnings else 'Violations Only')}):**\n\n{generated_text}{class_note}"
329
-
330
  except Exception as e:
331
  logger.error(f"OpenAI/HF Inference Endpoint error: {str(e)}")
332
  return f"""
@@ -372,7 +370,7 @@ def extract_relevant_rdf_section(rdf_content: str, class_name: str) -> str:
372
 
373
  return rdf_content[:1000] # Fallback
374
 
375
- def get_ai_correction(validation_results: str, rdf_content: str, template: str = 'monograph', max_attempts: int = None, include_warnings: bool = False) -> str:
376
  """
377
  Generate AI-powered corrected RDF/XML based on validation errors.
378
 
@@ -391,20 +389,31 @@ def get_ai_correction(validation_results: str, rdf_content: str, template: str =
391
  str: Corrected RDF/XML that should pass validation
392
  """
393
 
 
 
 
 
394
  # Use configuration default if not specified
395
  if max_attempts is None:
396
  max_attempts = MAX_CORRECTION_ATTEMPTS
397
-
398
- # Check if validation loop is enabled
399
- if not ENABLE_VALIDATION_LOOP:
400
- max_attempts = 1 # Fall back to single attempt if validation loop disabled
 
 
 
401
 
402
  if not OPENAI_AVAILABLE:
 
 
403
  return generate_manual_correction_hints(validation_results, rdf_content)
404
 
405
  # Get API key dynamically at runtime
406
  current_api_key = os.getenv('HF_API_KEY', '')
407
  if not current_api_key:
 
 
408
  return f"""<!-- AI correction disabled: Set HF_API_KEY as a Secret in your Space settings -->
409
 
410
  {generate_manual_correction_hints(validation_results, rdf_content)}"""
@@ -412,6 +421,8 @@ def get_ai_correction(validation_results: str, rdf_content: str, template: str =
412
  try:
413
  client = get_openai_client()
414
  if not client:
 
 
415
  return f"""<!-- AI correction disabled: HF_API_KEY not configured -->
416
 
417
  {generate_manual_correction_hints(validation_results, rdf_content)}"""
@@ -420,6 +431,8 @@ def get_ai_correction(validation_results: str, rdf_content: str, template: str =
420
  import time
421
  start_time = time.time()
422
  timeout = 60 # 60 second timeout
 
 
423
 
424
  severity_instruction = "Fix only the violations (errors) and ignore any warnings." if not include_warnings else "Fix both violations and warnings."
425
 
@@ -661,7 +674,7 @@ def extract_rdf_from_response(response: str) -> str:
661
  # If no code blocks found, return the response as-is
662
  return response
663
 
664
- def get_ai_correction(validation_results: str, rdf_content: str, template: str = 'monograph', max_attempts: int = None, include_warnings: bool = False) -> str:
665
  """
666
  Generate AI-powered corrected RDF/XML based on validation errors.
667
 
@@ -680,20 +693,31 @@ def get_ai_correction(validation_results: str, rdf_content: str, template: str =
680
  str: Corrected RDF/XML that should pass validation
681
  """
682
 
 
 
 
 
683
  # Use configuration default if not specified
684
  if max_attempts is None:
685
  max_attempts = MAX_CORRECTION_ATTEMPTS
686
-
687
- # Check if validation loop is enabled
688
- if not ENABLE_VALIDATION_LOOP:
689
- max_attempts = 1 # Fall back to single attempt if validation loop disabled
 
 
 
690
 
691
  if not OPENAI_AVAILABLE:
 
 
692
  return generate_manual_correction_hints(validation_results, rdf_content)
693
 
694
  # Get API key dynamically at runtime
695
  current_api_key = os.getenv('HF_API_KEY', '')
696
  if not current_api_key:
 
 
697
  return f"""<!-- AI correction disabled: Set HF_API_KEY as a Secret in your Space settings -->
698
 
699
  {generate_manual_correction_hints(validation_results, rdf_content)}"""
@@ -701,6 +725,8 @@ def get_ai_correction(validation_results: str, rdf_content: str, template: str =
701
  try:
702
  client = get_openai_client()
703
  if not client:
 
 
704
  return f"""<!-- AI correction disabled: HF_API_KEY not configured -->
705
 
706
  {generate_manual_correction_hints(validation_results, rdf_content)}"""
@@ -709,17 +735,25 @@ def get_ai_correction(validation_results: str, rdf_content: str, template: str =
709
  import time
710
  start_time = time.time()
711
  timeout = 60 # 60 second timeout
 
 
712
 
713
  severity_instruction = "Fix only the violations (errors) and ignore any warnings." if not include_warnings else "Fix both violations and warnings."
714
 
715
  # Try multiple attempts to generate valid RDF
716
  for attempt in range(max_attempts):
717
  # Check timeout
718
- if time.time() - start_time > timeout:
 
 
 
719
  print(f"⏰ Timeout reached after {timeout} seconds")
720
  break
721
 
722
- print(f"πŸ”„ Correction attempt {attempt + 1}/{max_attempts}")
 
 
 
723
 
724
  prompt = f"""You are an expert in RDF/XML. Fix the following RDF/XML based on the validation errors provided.
725
 
@@ -755,6 +789,8 @@ Please provide the corrected RDF/XML that addresses all validation issues.
755
  )
756
 
757
  corrected_rdf = chat_completion.choices[0].message.content.strip()
 
 
758
 
759
  # Extract RDF content if it's wrapped in code blocks
760
  corrected_rdf = extract_rdf_from_response(corrected_rdf)
@@ -766,32 +802,44 @@ Please provide the corrected RDF/XML that addresses all validation issues.
766
  conforms, new_results = validate_rdf(corrected_rdf.encode('utf-8'), template)
767
 
768
  if conforms:
769
- print(f"βœ… Correction validated successfully on attempt {attempt + 1}")
 
 
770
  return f"""<!-- AI-generated correction validated successfully -->
771
  {corrected_rdf}"""
772
  else:
773
- print(f"❌ Correction attempt {attempt + 1} still has validation errors")
 
 
774
  # Update validation_results for next attempt
775
  validation_results = new_results
776
 
777
  except Exception as e:
778
- print(f"⚠️ Error validating correction attempt {attempt + 1}: {str(e)}")
 
 
779
  # If validation fails, return the correction anyway
780
  return f"""<!-- AI-generated correction (validation check failed) -->
781
  {corrected_rdf}"""
782
  else:
783
  # If validator not available or timeout approaching, return the correction
 
 
784
  print("⚠️ Returning correction without validation")
785
  return f"""<!-- AI-generated correction (validation skipped) -->
786
  {corrected_rdf}"""
787
 
788
  except Exception as api_error:
789
- print(f"❌ API error on attempt {attempt + 1}: {str(api_error)}")
 
 
790
  if attempt == max_attempts - 1: # Last attempt
791
  raise api_error
792
  continue
793
 
794
  # All attempts failed or timed out
 
 
795
  return f"""<!-- AI correction failed after {max_attempts} attempts or timeout -->
796
  <!-- Please correct manually using the validation results as a guide -->
797
 
@@ -799,6 +847,8 @@ Please provide the corrected RDF/XML that addresses all validation issues.
799
 
800
  except Exception as e:
801
  logger.error(f"LLM API error: {str(e)}")
 
 
802
  return f"""<!-- Error generating AI correction: {str(e)} -->
803
 
804
  {generate_manual_correction_hints(validation_results, rdf_content)}"""
@@ -972,7 +1022,7 @@ def extract_xml_from_text(text: str) -> str:
972
  fenced = re.sub(r"^```[a-zA-Z]*\n|```$", "", text.strip())
973
  return fenced if fenced else text
974
 
975
- def validate_rdf_interface(rdf_content: str, template: str, use_ai: bool = True):
976
  """Main validation function for Gradio interface"""
977
  if not rdf_content.strip():
978
  return "❌ Error", "No RDF/XML data provided", "", "", "", ""
@@ -981,7 +1031,7 @@ def validate_rdf_interface(rdf_content: str, template: str, use_ai: bool = True)
981
  result = validate_rdf_tool(rdf_content, template)
982
 
983
  if "error" in result:
984
- return f"❌ Error: {result['error']}", "", "", "", "", ""
985
 
986
  status = result["status"]
987
  results_text = result["results"]
@@ -1005,19 +1055,59 @@ def validate_rdf_interface(rdf_content: str, template: str, use_ai: bool = True)
1005
 
1006
  filtered_results = '\n'.join(filtered_lines)
1007
 
 
 
 
 
 
 
 
 
1008
  if result["conforms"]:
1009
  suggestions = "βœ… No issues found! Your RDF/XML is valid according to the selected template."
1010
- corrected_rdf = "βœ… Your RDF/XML is already valid - no corrections needed!"
 
 
 
1011
  else:
1012
  if use_ai:
1013
  # Pass filtered results to AI functions
1014
  suggestions = get_ai_suggestions(filtered_results, rdf_content, include_warnings)
1015
- corrected_rdf = get_ai_correction(filtered_results, rdf_content, template, include_warnings=include_warnings)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1016
  else:
1017
  suggestions = generate_manual_suggestions(filtered_results)
1018
  corrected_rdf = generate_manual_correction_hints(filtered_results, rdf_content)
 
 
 
1019
 
1020
- return status, results_text, suggestions, corrected_rdf
 
1021
 
1022
  def get_rdf_examples(example_type: str = "valid") -> str:
1023
  """
@@ -1145,6 +1235,26 @@ def create_interface():
1145
  value=False,
1146
  info="Include warnings in AI corrections (violations only by default)"
1147
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1148
 
1149
  validate_btn = gr.Button("πŸ” Validate RDF", variant="primary", size="lg")
1150
 
@@ -1181,6 +1291,14 @@ def create_interface():
1181
  lines=8,
1182
  show_copy_button=True
1183
  )
 
 
 
 
 
 
 
 
1184
 
1185
  # Corrected RDF section
1186
  with gr.Row():
@@ -1212,8 +1330,8 @@ def create_interface():
1212
  # Event handlers
1213
  validate_btn.click(
1214
  fn=validate_rdf_interface,
1215
- inputs=[rdf_input, template_dropdown, use_ai_checkbox, include_warnings_checkbox],
1216
- outputs=[status_output, results_output, suggestions_output, corrected_output]
1217
  )
1218
 
1219
  # Remove auto-validation to prevent processing loops
@@ -1235,8 +1353,8 @@ def create_interface():
1235
  )
1236
 
1237
  clear_btn.click(
1238
- lambda: ("", "", "", "", "", "", ""),
1239
- outputs=[rdf_input, status_output, results_output, suggestions_output, corrected_output, corrected_status_output, corrected_results_output]
1240
  )
1241
 
1242
  # Footer with instructions
 
222
  return {k: '\n'.join(v) for k, v in class_results.items() if v}
223
 
224
  def get_ai_suggestions(validation_results: str, rdf_content: str, include_warnings: bool = False) -> str:
225
+ """Generate AI-powered, plain-language suggestions based on validation results.
226
+
227
+ Avoids RDF/SHACL jargon and focuses on actionable fixes.
228
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
  if not OPENAI_AVAILABLE:
230
  return generate_manual_suggestions(validation_results)
231
+
 
232
  current_api_key = os.getenv('HF_API_KEY', '')
233
  if not current_api_key:
234
  return f"""
 
236
 
237
  {generate_manual_suggestions(validation_results)}
238
  """
239
+
240
  try:
 
241
  client = get_openai_client()
242
  if not client:
243
  return f"""
 
245
 
246
  {generate_manual_suggestions(validation_results)}
247
  """
248
+
249
+ severity_instruction = (
250
+ "Focus only on violations (errors) and ignore any warnings."
251
+ if not include_warnings else
252
+ "Address both violations and warnings."
253
+ )
254
+
255
+ # Group errors by class to focus the prompt
256
  class_results = filter_validation_results_by_class(validation_results, rdf_content)
257
+ if class_results:
258
+ primary_class = max(class_results.keys(), key=lambda k: len(class_results[k]))
259
+ focused_results = class_results[primary_class]
260
+ else:
261
+ primary_class = "Record"
262
+ focused_results = validation_results
263
+
264
+ simplified_summary = parse_shacl_results_for_ai(focused_results)
265
  relevant_rdf = extract_relevant_rdf_section(rdf_content, primary_class)
266
+
267
+ prompt = f"""
268
+ You are a helpful metadata librarian. Write in plain language (no RDF/SHACL jargon). Analyze the validation errors for the {primary_class} and provide concise, actionable fixes.
269
 
270
  {severity_instruction}
271
 
272
  Validation Errors for {primary_class}:
273
  {focused_results[:1500]}
274
 
275
+ Validation Summary (plain language):
276
+ {simplified_summary}
277
+
278
  Relevant RDF Section:
279
  {relevant_rdf[:800]}
280
 
 
296
  ```
297
 
298
  Be ultra-concise. Show the fix, not explanations."""
299
+
 
 
 
 
300
  chat_completion = client.chat.completions.create(
301
  model=HF_MODEL,
302
  messages=[
303
+ {
304
+ "role": "system",
305
+ "content": "You are a friendly librarian helping fix catalog records. Never use technical RDF or SHACL terminology."
306
+ },
307
  {
308
  "role": "user",
309
  "content": prompt
310
  }
311
  ],
312
+ max_tokens=800,
313
+ temperature=0.5,
314
  top_p=0.9
315
  )
316
+
 
317
  generated_text = chat_completion.choices[0].message.content
318
+ generated_text = clean_technical_jargon(generated_text)
319
+
320
  other_classes = [k for k in class_results.keys() if k != primary_class]
321
+ class_note = (
322
+ f"\n\nπŸ“Œ **Note:** Focused on {primary_class} errors. " +
323
+ (f"Also found issues in: {', '.join(other_classes)}" if other_classes else "")
324
+ )
325
+
326
  return f"πŸ€– **AI-Powered Suggestions ({('Violations + Warnings' if include_warnings else 'Violations Only')}):**\n\n{generated_text}{class_note}"
327
+
328
  except Exception as e:
329
  logger.error(f"OpenAI/HF Inference Endpoint error: {str(e)}")
330
  return f"""
 
370
 
371
  return rdf_content[:1000] # Fallback
372
 
373
+ def get_ai_correction(validation_results: str, rdf_content: str, template: str = 'monograph', max_attempts: int = None, include_warnings: bool = False, enable_validation_loop: bool | None = None, steps_log: Optional[List[str]] = None) -> str:
374
  """
375
  Generate AI-powered corrected RDF/XML based on validation errors.
376
 
 
389
  str: Corrected RDF/XML that should pass validation
390
  """
391
 
392
+ # Determine whether to iterate based on parameter or global default
393
+ iterate_enabled = ENABLE_VALIDATION_LOOP if enable_validation_loop is None else enable_validation_loop
394
+ if steps_log is not None:
395
+ steps_log.append(f"Planning correction: iterate_enabled={iterate_enabled}, include_warnings={include_warnings}")
396
  # Use configuration default if not specified
397
  if max_attempts is None:
398
  max_attempts = MAX_CORRECTION_ATTEMPTS
399
+ if steps_log is not None:
400
+ steps_log.append(f"Max attempts set to {max_attempts}")
401
+ # If iteration disabled, force single attempt
402
+ if not iterate_enabled:
403
+ max_attempts = 1
404
+ if steps_log is not None:
405
+ steps_log.append("Iteration disabled; forcing single attempt")
406
 
407
  if not OPENAI_AVAILABLE:
408
+ if steps_log is not None:
409
+ steps_log.append("OPENAI client not available; falling back to manual hints")
410
  return generate_manual_correction_hints(validation_results, rdf_content)
411
 
412
  # Get API key dynamically at runtime
413
  current_api_key = os.getenv('HF_API_KEY', '')
414
  if not current_api_key:
415
+ if steps_log is not None:
416
+ steps_log.append("HF_API_KEY not set; cannot call model; returning manual hints")
417
  return f"""<!-- AI correction disabled: Set HF_API_KEY as a Secret in your Space settings -->
418
 
419
  {generate_manual_correction_hints(validation_results, rdf_content)}"""
 
421
  try:
422
  client = get_openai_client()
423
  if not client:
424
+ if steps_log is not None:
425
+ steps_log.append("Failed to initialize OpenAI client; returning manual hints")
426
  return f"""<!-- AI correction disabled: HF_API_KEY not configured -->
427
 
428
  {generate_manual_correction_hints(validation_results, rdf_content)}"""
 
431
  import time
432
  start_time = time.time()
433
  timeout = 60 # 60 second timeout
434
+ if steps_log is not None:
435
+ steps_log.append(f"Timeout budget: {timeout}s total")
436
 
437
  severity_instruction = "Fix only the violations (errors) and ignore any warnings." if not include_warnings else "Fix both violations and warnings."
438
 
 
674
  # If no code blocks found, return the response as-is
675
  return response
676
 
677
+ def get_ai_correction(validation_results: str, rdf_content: str, template: str = 'monograph', max_attempts: int = None, include_warnings: bool = False, enable_validation_loop: bool | None = None, steps_log: Optional[List[str]] = None) -> str:
678
  """
679
  Generate AI-powered corrected RDF/XML based on validation errors.
680
 
 
693
  str: Corrected RDF/XML that should pass validation
694
  """
695
 
696
+ # Determine whether to iterate based on parameter or global default
697
+ iterate_enabled = ENABLE_VALIDATION_LOOP if enable_validation_loop is None else enable_validation_loop
698
+ if steps_log is not None:
699
+ steps_log.append(f"Planning correction: iterate_enabled={iterate_enabled}, include_warnings={include_warnings}")
700
  # Use configuration default if not specified
701
  if max_attempts is None:
702
  max_attempts = MAX_CORRECTION_ATTEMPTS
703
+ if steps_log is not None:
704
+ steps_log.append(f"Max attempts set to {max_attempts}")
705
+ # If iteration disabled, force single attempt
706
+ if not iterate_enabled:
707
+ max_attempts = 1
708
+ if steps_log is not None:
709
+ steps_log.append("Iteration disabled; forcing single attempt")
710
 
711
  if not OPENAI_AVAILABLE:
712
+ if steps_log is not None:
713
+ steps_log.append("OPENAI client not available; falling back to manual hints")
714
  return generate_manual_correction_hints(validation_results, rdf_content)
715
 
716
  # Get API key dynamically at runtime
717
  current_api_key = os.getenv('HF_API_KEY', '')
718
  if not current_api_key:
719
+ if steps_log is not None:
720
+ steps_log.append("HF_API_KEY not set; cannot call model; returning manual hints")
721
  return f"""<!-- AI correction disabled: Set HF_API_KEY as a Secret in your Space settings -->
722
 
723
  {generate_manual_correction_hints(validation_results, rdf_content)}"""
 
725
  try:
726
  client = get_openai_client()
727
  if not client:
728
+ if steps_log is not None:
729
+ steps_log.append("Failed to initialize OpenAI client; returning manual hints")
730
  return f"""<!-- AI correction disabled: HF_API_KEY not configured -->
731
 
732
  {generate_manual_correction_hints(validation_results, rdf_content)}"""
 
735
  import time
736
  start_time = time.time()
737
  timeout = 60 # 60 second timeout
738
+ if steps_log is not None:
739
+ steps_log.append(f"Timeout budget: {timeout}s total")
740
 
741
  severity_instruction = "Fix only the violations (errors) and ignore any warnings." if not include_warnings else "Fix both violations and warnings."
742
 
743
  # Try multiple attempts to generate valid RDF
744
  for attempt in range(max_attempts):
745
  # Check timeout
746
+ elapsed = time.time() - start_time
747
+ if elapsed > timeout:
748
+ if steps_log is not None:
749
+ steps_log.append(f"Timeout reached after {int(elapsed)}s; stopping attempts")
750
  print(f"⏰ Timeout reached after {timeout} seconds")
751
  break
752
 
753
+ attempt_no = attempt + 1
754
+ if steps_log is not None:
755
+ steps_log.append(f"Attempt {attempt_no}/{max_attempts}: requesting model correction")
756
+ print(f"πŸ”„ Correction attempt {attempt_no}/{max_attempts}")
757
 
758
  prompt = f"""You are an expert in RDF/XML. Fix the following RDF/XML based on the validation errors provided.
759
 
 
789
  )
790
 
791
  corrected_rdf = chat_completion.choices[0].message.content.strip()
792
+ if steps_log is not None:
793
+ steps_log.append(f"Attempt {attempt_no}: model responded; extracting XML block if present")
794
 
795
  # Extract RDF content if it's wrapped in code blocks
796
  corrected_rdf = extract_rdf_from_response(corrected_rdf)
 
802
  conforms, new_results = validate_rdf(corrected_rdf.encode('utf-8'), template)
803
 
804
  if conforms:
805
+ if steps_log is not None:
806
+ steps_log.append(f"Attempt {attempt_no}: correction PASSED validation")
807
+ print(f"βœ… Correction validated successfully on attempt {attempt_no}")
808
  return f"""<!-- AI-generated correction validated successfully -->
809
  {corrected_rdf}"""
810
  else:
811
+ if steps_log is not None:
812
+ steps_log.append(f"Attempt {attempt_no}: still invalid; will retry with updated errors")
813
+ print(f"❌ Correction attempt {attempt_no} still has validation errors")
814
  # Update validation_results for next attempt
815
  validation_results = new_results
816
 
817
  except Exception as e:
818
+ if steps_log is not None:
819
+ steps_log.append(f"Attempt {attempt_no}: error during validation: {str(e)} β€” returning correction anyway")
820
+ print(f"⚠️ Error validating correction attempt {attempt_no}: {str(e)}")
821
  # If validation fails, return the correction anyway
822
  return f"""<!-- AI-generated correction (validation check failed) -->
823
  {corrected_rdf}"""
824
  else:
825
  # If validator not available or timeout approaching, return the correction
826
+ if steps_log is not None:
827
+ steps_log.append("Skipping validation check (validator unavailable or timeout)")
828
  print("⚠️ Returning correction without validation")
829
  return f"""<!-- AI-generated correction (validation skipped) -->
830
  {corrected_rdf}"""
831
 
832
  except Exception as api_error:
833
+ if steps_log is not None:
834
+ steps_log.append(f"Attempt {attempt_no}: API error: {str(api_error)}")
835
+ print(f"❌ API error on attempt {attempt_no}: {str(api_error)}")
836
  if attempt == max_attempts - 1: # Last attempt
837
  raise api_error
838
  continue
839
 
840
  # All attempts failed or timed out
841
+ if steps_log is not None:
842
+ steps_log.append("All attempts failed or timed out; returning manual hints")
843
  return f"""<!-- AI correction failed after {max_attempts} attempts or timeout -->
844
  <!-- Please correct manually using the validation results as a guide -->
845
 
 
847
 
848
  except Exception as e:
849
  logger.error(f"LLM API error: {str(e)}")
850
+ if steps_log is not None:
851
+ steps_log.append(f"Fatal error invoking model: {str(e)}")
852
  return f"""<!-- Error generating AI correction: {str(e)} -->
853
 
854
  {generate_manual_correction_hints(validation_results, rdf_content)}"""
 
1022
  fenced = re.sub(r"^```[a-zA-Z]*\n|```$", "", text.strip())
1023
  return fenced if fenced else text
1024
 
1025
+ def validate_rdf_interface(rdf_content: str, template: str, use_ai: bool = True, include_warnings: bool = False, iterate_until_valid: bool = False, max_attempts: int = 3, show_steps: bool = False):
1026
  """Main validation function for Gradio interface"""
1027
  if not rdf_content.strip():
1028
  return "❌ Error", "No RDF/XML data provided", "", "", "", ""
 
1031
  result = validate_rdf_tool(rdf_content, template)
1032
 
1033
  if "error" in result:
1034
+ return f"❌ Error: {result['error']}", "", "", "", "", "", ""
1035
 
1036
  status = result["status"]
1037
  results_text = result["results"]
 
1055
 
1056
  filtered_results = '\n'.join(filtered_lines)
1057
 
1058
+ corrected_status = ""
1059
+ corrected_results = ""
1060
+ steps_log: List[str] = []
1061
+ steps_log.append(f"Initial validation: {'PASSED' if result['conforms'] else 'FAILED'} using template '{template}'")
1062
+ if not include_warnings:
1063
+ steps_log.append("Configured to ignore warnings in AI processing")
1064
+ if iterate_until_valid:
1065
+ steps_log.append(f"Iteration enabled with max_attempts={max_attempts}")
1066
  if result["conforms"]:
1067
  suggestions = "βœ… No issues found! Your RDF/XML is valid according to the selected template."
1068
+ corrected_rdf = "<!-- Already valid - no corrections needed -->\n" + rdf_content
1069
+ corrected_status = "β€”"
1070
+ corrected_results = ""
1071
+ steps_log.append("No correction needed; record already conforms")
1072
  else:
1073
  if use_ai:
1074
  # Pass filtered results to AI functions
1075
  suggestions = get_ai_suggestions(filtered_results, rdf_content, include_warnings)
1076
+ steps_log.append("Requested AI suggestions for concise guidance")
1077
+ corrected_rdf = get_ai_correction(
1078
+ filtered_results,
1079
+ rdf_content,
1080
+ template,
1081
+ max_attempts=max_attempts,
1082
+ include_warnings=include_warnings,
1083
+ enable_validation_loop=iterate_until_valid,
1084
+ steps_log=steps_log,
1085
+ )
1086
+ # Attempt re-validation of corrected RDF
1087
+ try:
1088
+ corrected_xml = extract_xml_from_text(corrected_rdf)
1089
+ reval = validate_rdf_tool(corrected_xml, template)
1090
+ if "error" in reval:
1091
+ corrected_status = f"❌ Re-validation Error: {reval['error']}"
1092
+ corrected_results = ""
1093
+ steps_log.append(f"Re-validation failed with error: {reval['error']}")
1094
+ else:
1095
+ corrected_status = reval.get("status", "")
1096
+ corrected_results = reval.get("results", "")
1097
+ steps_log.append(f"Re-validation: {corrected_status}")
1098
+ except Exception as re_ex:
1099
+ corrected_status = f"❌ Re-validation Error: {re_ex}"
1100
+ corrected_results = ""
1101
+ steps_log.append(f"Re-validation error: {re_ex}")
1102
  else:
1103
  suggestions = generate_manual_suggestions(filtered_results)
1104
  corrected_rdf = generate_manual_correction_hints(filtered_results, rdf_content)
1105
+ corrected_status = "β€”"
1106
+ corrected_results = ""
1107
+ steps_log.append("AI disabled; produced manual suggestions and hints")
1108
 
1109
+ steps_text = "\n".join(steps_log) if show_steps else ""
1110
+ return status, results_text, suggestions, steps_text, corrected_rdf, corrected_status, corrected_results
1111
 
1112
  def get_rdf_examples(example_type: str = "valid") -> str:
1113
  """
 
1235
  value=False,
1236
  info="Include warnings in AI corrections (violations only by default)"
1237
  )
1238
+
1239
+ with gr.Row():
1240
+ iterate_checkbox = gr.Checkbox(
1241
+ label="Iterate until valid",
1242
+ value=False,
1243
+ info="Try multiple correction attempts until validation passes or attempts run out"
1244
+ )
1245
+ max_attempts_slider = gr.Slider(
1246
+ label="Max attempts",
1247
+ minimum=1,
1248
+ maximum=5,
1249
+ value=3,
1250
+ step=1,
1251
+ info="Maximum number of correction attempts when iterating"
1252
+ )
1253
+ show_steps_checkbox = gr.Checkbox(
1254
+ label="Show steps",
1255
+ value=True,
1256
+ info="Display step-by-step process (turn off for a simpler response)"
1257
+ )
1258
 
1259
  validate_btn = gr.Button("πŸ” Validate RDF", variant="primary", size="lg")
1260
 
 
1291
  lines=8,
1292
  show_copy_button=True
1293
  )
1294
+
1295
+ steps_output = gr.Textbox(
1296
+ label="🧭 Correction Steps",
1297
+ interactive=False,
1298
+ lines=10,
1299
+ show_copy_button=True,
1300
+ placeholder="Step-by-step log of how the system derived the corrected XML"
1301
+ )
1302
 
1303
  # Corrected RDF section
1304
  with gr.Row():
 
1330
  # Event handlers
1331
  validate_btn.click(
1332
  fn=validate_rdf_interface,
1333
+ inputs=[rdf_input, template_dropdown, use_ai_checkbox, include_warnings_checkbox, iterate_checkbox, max_attempts_slider, show_steps_checkbox],
1334
+ outputs=[status_output, results_output, suggestions_output, steps_output, corrected_output, corrected_status_output, corrected_results_output]
1335
  )
1336
 
1337
  # Remove auto-validation to prevent processing loops
 
1353
  )
1354
 
1355
  clear_btn.click(
1356
+ lambda: ("", "", "", "", "", "", "", ""),
1357
+ outputs=[rdf_input, status_output, results_output, suggestions_output, steps_output, corrected_output, corrected_status_output, corrected_results_output]
1358
  )
1359
 
1360
  # Footer with instructions