RDF Validation Deployment
commited on
Commit
·
1f1dd7e
1
Parent(s):
af9e2c1
streamline
Browse files
app.py
CHANGED
|
@@ -12,11 +12,12 @@ import json
|
|
| 12 |
import sys
|
| 13 |
import asyncio
|
| 14 |
import logging
|
| 15 |
-
import requests
|
| 16 |
import re
|
| 17 |
-
|
| 18 |
import threading
|
| 19 |
import time
|
|
|
|
|
|
|
| 20 |
|
| 21 |
# Add current directory to path
|
| 22 |
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
|
@@ -75,6 +76,49 @@ MCP4BIBFRAME_DOCS_ENABLED = True # Set to False to disable doc integration
|
|
| 75 |
BIBFRAME_DOCS_CACHE: Dict[str, tuple[Any, float]] = {}
|
| 76 |
BIBFRAME_DOCS_CACHE_TTL = 3600 # seconds
|
| 77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
def test_validator_functionality():
|
| 79 |
"""Test if the validator is actually working"""
|
| 80 |
if not VALIDATOR_AVAILABLE:
|
|
@@ -1231,7 +1275,7 @@ def generate_property_specific_fix(property_name: str, guidance: Optional[dict]
|
|
| 1231 |
# Fallback: simple literal placeholder
|
| 1232 |
return f"<bf:{property_name}>PLACEHOLDER_VALUE</bf:{property_name}>"
|
| 1233 |
|
| 1234 |
-
def get_ai_correction(validation_results: str, rdf_content: str, template: str = 'monograph', max_attempts: int = None, include_warnings: bool = False, enable_validation_loop: bool | None = None, steps_log: Optional[List[str]] = None) -> str:
|
| 1235 |
"""
|
| 1236 |
Generate AI-powered corrected RDF/XML based on validation errors.
|
| 1237 |
|
|
@@ -1264,6 +1308,13 @@ def get_ai_correction(validation_results: str, rdf_content: str, template: str =
|
|
| 1264 |
max_attempts = 1
|
| 1265 |
if steps_log is not None:
|
| 1266 |
steps_log.append("Iteration disabled; forcing single attempt")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1267 |
|
| 1268 |
if not OPENAI_AVAILABLE:
|
| 1269 |
if steps_log is not None:
|
|
@@ -1449,8 +1500,11 @@ Output ONLY valid RDF/XML following these rules:
|
|
| 1449 |
if steps_log is not None:
|
| 1450 |
steps_log.append(f"Attempt {attempt_no}: correction PASSED validation")
|
| 1451 |
print(f"✅ Correction validated successfully on attempt {attempt_no}")
|
| 1452 |
-
|
| 1453 |
{corrected_rdf}"""
|
|
|
|
|
|
|
|
|
|
| 1454 |
else:
|
| 1455 |
if steps_log is not None:
|
| 1456 |
steps_log.append(f"Attempt {attempt_no}: still invalid; will retry with updated errors")
|
|
@@ -1501,6 +1555,13 @@ Output ONLY valid RDF/XML following these rules:
|
|
| 1501 |
def get_ai_correction_targeted(validation_results: str, rdf_content: str, template: str = 'monograph', max_attempts: int = None, include_warnings: bool = False, enable_validation_loop: bool | None = None, steps_log: Optional[List[str]] = None) -> str:
|
| 1502 |
"""Fast path that attempts structured quick fixes before invoking the full AI loop."""
|
| 1503 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1504 |
focus_points = extract_error_focus_points(validation_results)
|
| 1505 |
missing_props = focus_points.get("missing_properties", [])
|
| 1506 |
|
|
@@ -1555,6 +1616,8 @@ def get_ai_correction_targeted(validation_results: str, rdf_content: str, templa
|
|
| 1555 |
if conforms:
|
| 1556 |
if steps_log is not None:
|
| 1557 |
steps_log.append("Quick fix succeeded; validation now passes")
|
|
|
|
|
|
|
| 1558 |
return working_rdf
|
| 1559 |
else:
|
| 1560 |
if steps_log is not None:
|
|
@@ -1564,6 +1627,9 @@ def get_ai_correction_targeted(validation_results: str, rdf_content: str, templa
|
|
| 1564 |
if steps_log is not None:
|
| 1565 |
steps_log.append(f"Quick fix validation error: {quick_err}; using AI fallback")
|
| 1566 |
|
|
|
|
|
|
|
|
|
|
| 1567 |
return get_ai_correction(
|
| 1568 |
validation_results,
|
| 1569 |
working_rdf,
|
|
@@ -1571,6 +1637,7 @@ def get_ai_correction_targeted(validation_results: str, rdf_content: str, templa
|
|
| 1571 |
max_attempts=max_attempts,
|
| 1572 |
include_warnings=include_warnings,
|
| 1573 |
enable_validation_loop=enable_validation_loop,
|
|
|
|
| 1574 |
steps_log=steps_log,
|
| 1575 |
)
|
| 1576 |
|
|
|
|
| 12 |
import sys
|
| 13 |
import asyncio
|
| 14 |
import logging
|
|
|
|
| 15 |
import re
|
| 16 |
+
import hashlib
|
| 17 |
import threading
|
| 18 |
import time
|
| 19 |
+
from collections import OrderedDict
|
| 20 |
+
from typing import Any, Dict, List, Optional
|
| 21 |
|
| 22 |
# Add current directory to path
|
| 23 |
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
| 76 |
BIBFRAME_DOCS_CACHE: Dict[str, tuple[Any, float]] = {}
|
| 77 |
BIBFRAME_DOCS_CACHE_TTL = 3600 # seconds
|
| 78 |
|
| 79 |
+
# Cache successful correction outputs to accelerate repeated error patterns
|
| 80 |
+
FIX_CACHE: OrderedDict[str, str] = OrderedDict()
|
| 81 |
+
FIX_CACHE_MAX_SIZE = 100
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
def _make_fix_cache_key(validation_results: str, rdf_content: str, template: str) -> str:
|
| 85 |
+
"""Generate a deterministic cache key for correction attempts."""
|
| 86 |
+
hasher = hashlib.sha256()
|
| 87 |
+
hasher.update(template.strip().encode("utf-8"))
|
| 88 |
+
hasher.update(b"\x1f")
|
| 89 |
+
hasher.update(validation_results.strip().encode("utf-8", errors="ignore"))
|
| 90 |
+
hasher.update(b"\x1f")
|
| 91 |
+
hasher.update(rdf_content.strip().encode("utf-8", errors="ignore"))
|
| 92 |
+
return hasher.hexdigest()
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def _get_cached_correction(cache_key: str, steps_log: Optional[List[str]] = None) -> Optional[str]:
|
| 96 |
+
"""Retrieve a cached correction, updating its recency ordering."""
|
| 97 |
+
cached = FIX_CACHE.get(cache_key)
|
| 98 |
+
if cached is not None:
|
| 99 |
+
FIX_CACHE.move_to_end(cache_key)
|
| 100 |
+
if steps_log is not None:
|
| 101 |
+
steps_log.append("Using cached correction for repeated validation errors")
|
| 102 |
+
return cached
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def _store_correction_in_cache(cache_key: str, corrected_rdf: str, steps_log: Optional[List[str]] = None) -> None:
|
| 106 |
+
"""Store a correction in the cache and evict the oldest entry if needed."""
|
| 107 |
+
if not corrected_rdf:
|
| 108 |
+
return
|
| 109 |
+
FIX_CACHE[cache_key] = corrected_rdf
|
| 110 |
+
FIX_CACHE.move_to_end(cache_key)
|
| 111 |
+
if len(FIX_CACHE) > FIX_CACHE_MAX_SIZE:
|
| 112 |
+
removed_key, _ = FIX_CACHE.popitem(last=False)
|
| 113 |
+
if steps_log is not None:
|
| 114 |
+
steps_log.append("Cache full; evicted oldest correction entry")
|
| 115 |
+
elif steps_log is not None:
|
| 116 |
+
steps_log.append("Cached correction for future reuse")
|
| 117 |
+
|
| 118 |
+
# Cache successful correction outputs to accelerate repeated error patterns
|
| 119 |
+
FIX_CACHE: OrderedDict[str, str] = OrderedDict()
|
| 120 |
+
FIX_CACHE_MAX_SIZE = 100
|
| 121 |
+
|
| 122 |
def test_validator_functionality():
|
| 123 |
"""Test if the validator is actually working"""
|
| 124 |
if not VALIDATOR_AVAILABLE:
|
|
|
|
| 1275 |
# Fallback: simple literal placeholder
|
| 1276 |
return f"<bf:{property_name}>PLACEHOLDER_VALUE</bf:{property_name}>"
|
| 1277 |
|
| 1278 |
+
def get_ai_correction(validation_results: str, rdf_content: str, template: str = 'monograph', max_attempts: int = None, include_warnings: bool = False, enable_validation_loop: bool | None = None, cache_key: Optional[str] = None, steps_log: Optional[List[str]] = None) -> str:
|
| 1279 |
"""
|
| 1280 |
Generate AI-powered corrected RDF/XML based on validation errors.
|
| 1281 |
|
|
|
|
| 1308 |
max_attempts = 1
|
| 1309 |
if steps_log is not None:
|
| 1310 |
steps_log.append("Iteration disabled; forcing single attempt")
|
| 1311 |
+
|
| 1312 |
+
if cache_key is None and validation_results and rdf_content:
|
| 1313 |
+
cache_key = _make_fix_cache_key(validation_results, rdf_content, template)
|
| 1314 |
+
if cache_key:
|
| 1315 |
+
cached_result = _get_cached_correction(cache_key, steps_log)
|
| 1316 |
+
if cached_result is not None:
|
| 1317 |
+
return cached_result
|
| 1318 |
|
| 1319 |
if not OPENAI_AVAILABLE:
|
| 1320 |
if steps_log is not None:
|
|
|
|
| 1500 |
if steps_log is not None:
|
| 1501 |
steps_log.append(f"Attempt {attempt_no}: correction PASSED validation")
|
| 1502 |
print(f"✅ Correction validated successfully on attempt {attempt_no}")
|
| 1503 |
+
result_text = f"""<!-- AI-generated correction validated successfully -->
|
| 1504 |
{corrected_rdf}"""
|
| 1505 |
+
if cache_key:
|
| 1506 |
+
_store_correction_in_cache(cache_key, result_text, steps_log)
|
| 1507 |
+
return result_text
|
| 1508 |
else:
|
| 1509 |
if steps_log is not None:
|
| 1510 |
steps_log.append(f"Attempt {attempt_no}: still invalid; will retry with updated errors")
|
|
|
|
| 1555 |
def get_ai_correction_targeted(validation_results: str, rdf_content: str, template: str = 'monograph', max_attempts: int = None, include_warnings: bool = False, enable_validation_loop: bool | None = None, steps_log: Optional[List[str]] = None) -> str:
|
| 1556 |
"""Fast path that attempts structured quick fixes before invoking the full AI loop."""
|
| 1557 |
|
| 1558 |
+
cache_key: Optional[str] = None
|
| 1559 |
+
if validation_results and rdf_content:
|
| 1560 |
+
cache_key = _make_fix_cache_key(validation_results, rdf_content, template)
|
| 1561 |
+
cached = _get_cached_correction(cache_key, steps_log)
|
| 1562 |
+
if cached is not None:
|
| 1563 |
+
return cached
|
| 1564 |
+
|
| 1565 |
focus_points = extract_error_focus_points(validation_results)
|
| 1566 |
missing_props = focus_points.get("missing_properties", [])
|
| 1567 |
|
|
|
|
| 1616 |
if conforms:
|
| 1617 |
if steps_log is not None:
|
| 1618 |
steps_log.append("Quick fix succeeded; validation now passes")
|
| 1619 |
+
if cache_key:
|
| 1620 |
+
_store_correction_in_cache(cache_key, working_rdf, steps_log)
|
| 1621 |
return working_rdf
|
| 1622 |
else:
|
| 1623 |
if steps_log is not None:
|
|
|
|
| 1627 |
if steps_log is not None:
|
| 1628 |
steps_log.append(f"Quick fix validation error: {quick_err}; using AI fallback")
|
| 1629 |
|
| 1630 |
+
if validation_results and working_rdf:
|
| 1631 |
+
cache_key = _make_fix_cache_key(validation_results, working_rdf, template)
|
| 1632 |
+
|
| 1633 |
return get_ai_correction(
|
| 1634 |
validation_results,
|
| 1635 |
working_rdf,
|
|
|
|
| 1637 |
max_attempts=max_attempts,
|
| 1638 |
include_warnings=include_warnings,
|
| 1639 |
enable_validation_loop=enable_validation_loop,
|
| 1640 |
+
cache_key=cache_key,
|
| 1641 |
steps_log=steps_log,
|
| 1642 |
)
|
| 1643 |
|