Spaces:

jimfhahn
/

mcp4rdf

Sleeping

App Files Files Community

RDF Validation Deployment commited on Oct 4

Commit

1f1dd7e

1 Parent(s): af9e2c1

streamline

Browse files

Files changed (1) hide show

app.py +71 -4

app.py CHANGED Viewed

@@ -12,11 +12,12 @@ import json
 import sys
 import asyncio
 import logging
-import requests
 import re
-from typing import Any, Dict, List, Optional
 import threading
 import time
 # Add current directory to path
 sys.path.append(os.path.dirname(os.path.abspath(__file__)))
@@ -75,6 +76,49 @@ MCP4BIBFRAME_DOCS_ENABLED = True  # Set to False to disable doc integration
 BIBFRAME_DOCS_CACHE: Dict[str, tuple[Any, float]] = {}
 BIBFRAME_DOCS_CACHE_TTL = 3600  # seconds
 def test_validator_functionality():
     """Test if the validator is actually working"""
     if not VALIDATOR_AVAILABLE:
@@ -1231,7 +1275,7 @@ def generate_property_specific_fix(property_name: str, guidance: Optional[dict]
     # Fallback: simple literal placeholder
     return f"<bf:{property_name}>PLACEHOLDER_VALUE</bf:{property_name}>"
-def get_ai_correction(validation_results: str, rdf_content: str, template: str = 'monograph', max_attempts: int = None, include_warnings: bool = False, enable_validation_loop: bool | None = None, steps_log: Optional[List[str]] = None) -> str:
     """
     Generate AI-powered corrected RDF/XML based on validation errors.
@@ -1264,6 +1308,13 @@ def get_ai_correction(validation_results: str, rdf_content: str, template: str =
         max_attempts = 1
         if steps_log is not None:
             steps_log.append("Iteration disabled; forcing single attempt")
     if not OPENAI_AVAILABLE:
         if steps_log is not None:
@@ -1449,8 +1500,11 @@ Output ONLY valid RDF/XML following these rules:
                             if steps_log is not None:
                                 steps_log.append(f"Attempt {attempt_no}: correction PASSED validation")
                             print(f"✅ Correction validated successfully on attempt {attempt_no}")
-                            return f"""<!-- AI-generated correction validated successfully -->
 {corrected_rdf}"""
                         else:
                             if steps_log is not None:
                                 steps_log.append(f"Attempt {attempt_no}: still invalid; will retry with updated errors")
@@ -1501,6 +1555,13 @@ Output ONLY valid RDF/XML following these rules:
 def get_ai_correction_targeted(validation_results: str, rdf_content: str, template: str = 'monograph', max_attempts: int = None, include_warnings: bool = False, enable_validation_loop: bool | None = None, steps_log: Optional[List[str]] = None) -> str:
     """Fast path that attempts structured quick fixes before invoking the full AI loop."""
     focus_points = extract_error_focus_points(validation_results)
     missing_props = focus_points.get("missing_properties", [])
@@ -1555,6 +1616,8 @@ def get_ai_correction_targeted(validation_results: str, rdf_content: str, templa
                 if conforms:
                     if steps_log is not None:
                         steps_log.append("Quick fix succeeded; validation now passes")
                     return working_rdf
                 else:
                     if steps_log is not None:
@@ -1564,6 +1627,9 @@ def get_ai_correction_targeted(validation_results: str, rdf_content: str, templa
                 if steps_log is not None:
                     steps_log.append(f"Quick fix validation error: {quick_err}; using AI fallback")
     return get_ai_correction(
         validation_results,
         working_rdf,
@@ -1571,6 +1637,7 @@ def get_ai_correction_targeted(validation_results: str, rdf_content: str, templa
         max_attempts=max_attempts,
         include_warnings=include_warnings,
         enable_validation_loop=enable_validation_loop,
         steps_log=steps_log,
     )

 import sys
 import asyncio
 import logging
 import re
+import hashlib
 import threading
 import time
+from collections import OrderedDict
+from typing import Any, Dict, List, Optional
 # Add current directory to path
 sys.path.append(os.path.dirname(os.path.abspath(__file__)))
 BIBFRAME_DOCS_CACHE: Dict[str, tuple[Any, float]] = {}
 BIBFRAME_DOCS_CACHE_TTL = 3600  # seconds
+# Cache successful correction outputs to accelerate repeated error patterns
+FIX_CACHE: OrderedDict[str, str] = OrderedDict()
+FIX_CACHE_MAX_SIZE = 100
+def _make_fix_cache_key(validation_results: str, rdf_content: str, template: str) -> str:
+    """Generate a deterministic cache key for correction attempts."""
+    hasher = hashlib.sha256()
+    hasher.update(template.strip().encode("utf-8"))
+    hasher.update(b"\x1f")
+    hasher.update(validation_results.strip().encode("utf-8", errors="ignore"))
+    hasher.update(b"\x1f")
+    hasher.update(rdf_content.strip().encode("utf-8", errors="ignore"))
+    return hasher.hexdigest()
+def _get_cached_correction(cache_key: str, steps_log: Optional[List[str]] = None) -> Optional[str]:
+    """Retrieve a cached correction, updating its recency ordering."""
+    cached = FIX_CACHE.get(cache_key)
+    if cached is not None:
+        FIX_CACHE.move_to_end(cache_key)
+        if steps_log is not None:
+            steps_log.append("Using cached correction for repeated validation errors")
+    return cached
+def _store_correction_in_cache(cache_key: str, corrected_rdf: str, steps_log: Optional[List[str]] = None) -> None:
+    """Store a correction in the cache and evict the oldest entry if needed."""
+    if not corrected_rdf:
+        return
+    FIX_CACHE[cache_key] = corrected_rdf
+    FIX_CACHE.move_to_end(cache_key)
+    if len(FIX_CACHE) > FIX_CACHE_MAX_SIZE:
+        removed_key, _ = FIX_CACHE.popitem(last=False)
+        if steps_log is not None:
+            steps_log.append("Cache full; evicted oldest correction entry")
+    elif steps_log is not None:
+        steps_log.append("Cached correction for future reuse")
+# Cache successful correction outputs to accelerate repeated error patterns
+FIX_CACHE: OrderedDict[str, str] = OrderedDict()
+FIX_CACHE_MAX_SIZE = 100
 def test_validator_functionality():
     """Test if the validator is actually working"""
     if not VALIDATOR_AVAILABLE:
     # Fallback: simple literal placeholder
     return f"<bf:{property_name}>PLACEHOLDER_VALUE</bf:{property_name}>"
+def get_ai_correction(validation_results: str, rdf_content: str, template: str = 'monograph', max_attempts: int = None, include_warnings: bool = False, enable_validation_loop: bool | None = None, cache_key: Optional[str] = None, steps_log: Optional[List[str]] = None) -> str:
     """
     Generate AI-powered corrected RDF/XML based on validation errors.
         max_attempts = 1
         if steps_log is not None:
             steps_log.append("Iteration disabled; forcing single attempt")
+    if cache_key is None and validation_results and rdf_content:
+        cache_key = _make_fix_cache_key(validation_results, rdf_content, template)
+    if cache_key:
+        cached_result = _get_cached_correction(cache_key, steps_log)
+        if cached_result is not None:
+            return cached_result
     if not OPENAI_AVAILABLE:
         if steps_log is not None:
                             if steps_log is not None:
                                 steps_log.append(f"Attempt {attempt_no}: correction PASSED validation")
                             print(f"✅ Correction validated successfully on attempt {attempt_no}")
+                            result_text = f"""<!-- AI-generated correction validated successfully -->
 {corrected_rdf}"""
+                            if cache_key:
+                                _store_correction_in_cache(cache_key, result_text, steps_log)
+                            return result_text
                         else:
                             if steps_log is not None:
                                 steps_log.append(f"Attempt {attempt_no}: still invalid; will retry with updated errors")
 def get_ai_correction_targeted(validation_results: str, rdf_content: str, template: str = 'monograph', max_attempts: int = None, include_warnings: bool = False, enable_validation_loop: bool | None = None, steps_log: Optional[List[str]] = None) -> str:
     """Fast path that attempts structured quick fixes before invoking the full AI loop."""
+    cache_key: Optional[str] = None
+    if validation_results and rdf_content:
+        cache_key = _make_fix_cache_key(validation_results, rdf_content, template)
+        cached = _get_cached_correction(cache_key, steps_log)
+        if cached is not None:
+            return cached
     focus_points = extract_error_focus_points(validation_results)
     missing_props = focus_points.get("missing_properties", [])
                 if conforms:
                     if steps_log is not None:
                         steps_log.append("Quick fix succeeded; validation now passes")
+                    if cache_key:
+                        _store_correction_in_cache(cache_key, working_rdf, steps_log)
                     return working_rdf
                 else:
                     if steps_log is not None:
                 if steps_log is not None:
                     steps_log.append(f"Quick fix validation error: {quick_err}; using AI fallback")
+    if validation_results and working_rdf:
+        cache_key = _make_fix_cache_key(validation_results, working_rdf, template)
     return get_ai_correction(
         validation_results,
         working_rdf,
         max_attempts=max_attempts,
         include_warnings=include_warnings,
         enable_validation_loop=enable_validation_loop,
+        cache_key=cache_key,
         steps_log=steps_log,
     )