RDF Validation Deployment commited on
Commit
1f1dd7e
·
1 Parent(s): af9e2c1

streamline

Browse files
Files changed (1) hide show
  1. app.py +71 -4
app.py CHANGED
@@ -12,11 +12,12 @@ import json
12
  import sys
13
  import asyncio
14
  import logging
15
- import requests
16
  import re
17
- from typing import Any, Dict, List, Optional
18
  import threading
19
  import time
 
 
20
 
21
  # Add current directory to path
22
  sys.path.append(os.path.dirname(os.path.abspath(__file__)))
@@ -75,6 +76,49 @@ MCP4BIBFRAME_DOCS_ENABLED = True # Set to False to disable doc integration
75
  BIBFRAME_DOCS_CACHE: Dict[str, tuple[Any, float]] = {}
76
  BIBFRAME_DOCS_CACHE_TTL = 3600 # seconds
77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  def test_validator_functionality():
79
  """Test if the validator is actually working"""
80
  if not VALIDATOR_AVAILABLE:
@@ -1231,7 +1275,7 @@ def generate_property_specific_fix(property_name: str, guidance: Optional[dict]
1231
  # Fallback: simple literal placeholder
1232
  return f"<bf:{property_name}>PLACEHOLDER_VALUE</bf:{property_name}>"
1233
 
1234
- def get_ai_correction(validation_results: str, rdf_content: str, template: str = 'monograph', max_attempts: int = None, include_warnings: bool = False, enable_validation_loop: bool | None = None, steps_log: Optional[List[str]] = None) -> str:
1235
  """
1236
  Generate AI-powered corrected RDF/XML based on validation errors.
1237
 
@@ -1264,6 +1308,13 @@ def get_ai_correction(validation_results: str, rdf_content: str, template: str =
1264
  max_attempts = 1
1265
  if steps_log is not None:
1266
  steps_log.append("Iteration disabled; forcing single attempt")
 
 
 
 
 
 
 
1267
 
1268
  if not OPENAI_AVAILABLE:
1269
  if steps_log is not None:
@@ -1449,8 +1500,11 @@ Output ONLY valid RDF/XML following these rules:
1449
  if steps_log is not None:
1450
  steps_log.append(f"Attempt {attempt_no}: correction PASSED validation")
1451
  print(f"✅ Correction validated successfully on attempt {attempt_no}")
1452
- return f"""<!-- AI-generated correction validated successfully -->
1453
  {corrected_rdf}"""
 
 
 
1454
  else:
1455
  if steps_log is not None:
1456
  steps_log.append(f"Attempt {attempt_no}: still invalid; will retry with updated errors")
@@ -1501,6 +1555,13 @@ Output ONLY valid RDF/XML following these rules:
1501
  def get_ai_correction_targeted(validation_results: str, rdf_content: str, template: str = 'monograph', max_attempts: int = None, include_warnings: bool = False, enable_validation_loop: bool | None = None, steps_log: Optional[List[str]] = None) -> str:
1502
  """Fast path that attempts structured quick fixes before invoking the full AI loop."""
1503
 
 
 
 
 
 
 
 
1504
  focus_points = extract_error_focus_points(validation_results)
1505
  missing_props = focus_points.get("missing_properties", [])
1506
 
@@ -1555,6 +1616,8 @@ def get_ai_correction_targeted(validation_results: str, rdf_content: str, templa
1555
  if conforms:
1556
  if steps_log is not None:
1557
  steps_log.append("Quick fix succeeded; validation now passes")
 
 
1558
  return working_rdf
1559
  else:
1560
  if steps_log is not None:
@@ -1564,6 +1627,9 @@ def get_ai_correction_targeted(validation_results: str, rdf_content: str, templa
1564
  if steps_log is not None:
1565
  steps_log.append(f"Quick fix validation error: {quick_err}; using AI fallback")
1566
 
 
 
 
1567
  return get_ai_correction(
1568
  validation_results,
1569
  working_rdf,
@@ -1571,6 +1637,7 @@ def get_ai_correction_targeted(validation_results: str, rdf_content: str, templa
1571
  max_attempts=max_attempts,
1572
  include_warnings=include_warnings,
1573
  enable_validation_loop=enable_validation_loop,
 
1574
  steps_log=steps_log,
1575
  )
1576
 
 
12
  import sys
13
  import asyncio
14
  import logging
 
15
  import re
16
+ import hashlib
17
  import threading
18
  import time
19
+ from collections import OrderedDict
20
+ from typing import Any, Dict, List, Optional
21
 
22
  # Add current directory to path
23
  sys.path.append(os.path.dirname(os.path.abspath(__file__)))
 
76
  BIBFRAME_DOCS_CACHE: Dict[str, tuple[Any, float]] = {}
77
  BIBFRAME_DOCS_CACHE_TTL = 3600 # seconds
78
 
79
+ # Cache successful correction outputs to accelerate repeated error patterns
80
+ FIX_CACHE: OrderedDict[str, str] = OrderedDict()
81
+ FIX_CACHE_MAX_SIZE = 100
82
+
83
+
84
+ def _make_fix_cache_key(validation_results: str, rdf_content: str, template: str) -> str:
85
+ """Generate a deterministic cache key for correction attempts."""
86
+ hasher = hashlib.sha256()
87
+ hasher.update(template.strip().encode("utf-8"))
88
+ hasher.update(b"\x1f")
89
+ hasher.update(validation_results.strip().encode("utf-8", errors="ignore"))
90
+ hasher.update(b"\x1f")
91
+ hasher.update(rdf_content.strip().encode("utf-8", errors="ignore"))
92
+ return hasher.hexdigest()
93
+
94
+
95
+ def _get_cached_correction(cache_key: str, steps_log: Optional[List[str]] = None) -> Optional[str]:
96
+ """Retrieve a cached correction, updating its recency ordering."""
97
+ cached = FIX_CACHE.get(cache_key)
98
+ if cached is not None:
99
+ FIX_CACHE.move_to_end(cache_key)
100
+ if steps_log is not None:
101
+ steps_log.append("Using cached correction for repeated validation errors")
102
+ return cached
103
+
104
+
105
+ def _store_correction_in_cache(cache_key: str, corrected_rdf: str, steps_log: Optional[List[str]] = None) -> None:
106
+ """Store a correction in the cache and evict the oldest entry if needed."""
107
+ if not corrected_rdf:
108
+ return
109
+ FIX_CACHE[cache_key] = corrected_rdf
110
+ FIX_CACHE.move_to_end(cache_key)
111
+ if len(FIX_CACHE) > FIX_CACHE_MAX_SIZE:
112
+ removed_key, _ = FIX_CACHE.popitem(last=False)
113
+ if steps_log is not None:
114
+ steps_log.append("Cache full; evicted oldest correction entry")
115
+ elif steps_log is not None:
116
+ steps_log.append("Cached correction for future reuse")
117
+
118
+ # Cache successful correction outputs to accelerate repeated error patterns
119
+ FIX_CACHE: OrderedDict[str, str] = OrderedDict()
120
+ FIX_CACHE_MAX_SIZE = 100
121
+
122
  def test_validator_functionality():
123
  """Test if the validator is actually working"""
124
  if not VALIDATOR_AVAILABLE:
 
1275
  # Fallback: simple literal placeholder
1276
  return f"<bf:{property_name}>PLACEHOLDER_VALUE</bf:{property_name}>"
1277
 
1278
+ def get_ai_correction(validation_results: str, rdf_content: str, template: str = 'monograph', max_attempts: int = None, include_warnings: bool = False, enable_validation_loop: bool | None = None, cache_key: Optional[str] = None, steps_log: Optional[List[str]] = None) -> str:
1279
  """
1280
  Generate AI-powered corrected RDF/XML based on validation errors.
1281
 
 
1308
  max_attempts = 1
1309
  if steps_log is not None:
1310
  steps_log.append("Iteration disabled; forcing single attempt")
1311
+
1312
+ if cache_key is None and validation_results and rdf_content:
1313
+ cache_key = _make_fix_cache_key(validation_results, rdf_content, template)
1314
+ if cache_key:
1315
+ cached_result = _get_cached_correction(cache_key, steps_log)
1316
+ if cached_result is not None:
1317
+ return cached_result
1318
 
1319
  if not OPENAI_AVAILABLE:
1320
  if steps_log is not None:
 
1500
  if steps_log is not None:
1501
  steps_log.append(f"Attempt {attempt_no}: correction PASSED validation")
1502
  print(f"✅ Correction validated successfully on attempt {attempt_no}")
1503
+ result_text = f"""<!-- AI-generated correction validated successfully -->
1504
  {corrected_rdf}"""
1505
+ if cache_key:
1506
+ _store_correction_in_cache(cache_key, result_text, steps_log)
1507
+ return result_text
1508
  else:
1509
  if steps_log is not None:
1510
  steps_log.append(f"Attempt {attempt_no}: still invalid; will retry with updated errors")
 
1555
  def get_ai_correction_targeted(validation_results: str, rdf_content: str, template: str = 'monograph', max_attempts: int = None, include_warnings: bool = False, enable_validation_loop: bool | None = None, steps_log: Optional[List[str]] = None) -> str:
1556
  """Fast path that attempts structured quick fixes before invoking the full AI loop."""
1557
 
1558
+ cache_key: Optional[str] = None
1559
+ if validation_results and rdf_content:
1560
+ cache_key = _make_fix_cache_key(validation_results, rdf_content, template)
1561
+ cached = _get_cached_correction(cache_key, steps_log)
1562
+ if cached is not None:
1563
+ return cached
1564
+
1565
  focus_points = extract_error_focus_points(validation_results)
1566
  missing_props = focus_points.get("missing_properties", [])
1567
 
 
1616
  if conforms:
1617
  if steps_log is not None:
1618
  steps_log.append("Quick fix succeeded; validation now passes")
1619
+ if cache_key:
1620
+ _store_correction_in_cache(cache_key, working_rdf, steps_log)
1621
  return working_rdf
1622
  else:
1623
  if steps_log is not None:
 
1627
  if steps_log is not None:
1628
  steps_log.append(f"Quick fix validation error: {quick_err}; using AI fallback")
1629
 
1630
+ if validation_results and working_rdf:
1631
+ cache_key = _make_fix_cache_key(validation_results, working_rdf, template)
1632
+
1633
  return get_ai_correction(
1634
  validation_results,
1635
  working_rdf,
 
1637
  max_attempts=max_attempts,
1638
  include_warnings=include_warnings,
1639
  enable_validation_loop=enable_validation_loop,
1640
+ cache_key=cache_key,
1641
  steps_log=steps_log,
1642
  )
1643