Spaces:

jimfhahn
/

mcp4rdf

Sleeping

App Files Files Community

jimfhahn commited on Aug 2

Commit

7c7b0c4

verified ·

1 Parent(s): 62f2727

Upload app.py

Browse files

Files changed (1) hide show

app.py +379 -7

app.py CHANGED Viewed

@@ -13,6 +13,7 @@ import sys
 import asyncio
 import logging
 import requests
 from typing import Any, Dict, List, Optional
 import threading
 import time
@@ -168,6 +169,58 @@ def validate_rdf_tool(rdf_content: str, template: str = "monograph") -> dict:
             "conforms": False
         }
 def get_ai_suggestions(validation_results: str, rdf_content: str, include_warnings: bool = False) -> str:
     """
     Generate AI-powered fix suggestions for invalid RDF/XML.
@@ -208,7 +261,309 @@ def get_ai_suggestions(validation_results: str, rdf_content: str, include_warnin
         severity_instruction = "Focus only on violations (errors) and ignore any warnings." if not include_warnings else "Address both violations and warnings."
-        prompt = f"""You are an expert in RDF/XML and SHACL validation. Analyze the following validation results and provide clear, actionable suggestions for fixing the RDF issues.
 {severity_instruction}
@@ -218,13 +573,30 @@ Validation Results:
 Original RDF (first 1000 chars):
 {rdf_content[:1000]}...
-Please provide:
-1. A clear summary of what's wrong
-2. Specific step-by-step instructions to fix each issue
-3. Example corrections where applicable
-4. Best practices to prevent similar issues
-Format your response in a helpful, structured way using markdown."""
         # Make API call using OpenAI client
         print(f"🔄 Making API call to: {HF_ENDPOINT_URL}")

 import asyncio
 import logging
 import requests
+import re
 from typing import Any, Dict, List, Optional
 import threading
 import time
             "conforms": False
         }
+def filter_validation_results_by_class(validation_results: str, rdf_content: str) -> dict:
+    """
+    Filter validation results by RDF class (Work, Instance, etc.)
+    Args:
+        validation_results (str): Full validation results
+        rdf_content (str): Original RDF content
+    Returns:
+        dict: Validation results organized by class
+    """
+    import re
+    # Parse validation results to extract class information
+    class_results = {
+        'Work': [],
+        'Instance': [],
+        'Title': [],
+        'Contribution': [],
+        'Other': []
+    }
+    lines = validation_results.split('\n')
+    current_section = []
+    current_class = 'Other'
+    for line in lines:
+        # Detect which class this error relates to
+        if 'bf:Work' in line or '/work/' in line:
+            current_class = 'Work'
+        elif 'bf:Instance' in line or '/instance/' in line:
+            current_class = 'Instance'
+        elif 'bf:Title' in line:
+            current_class = 'Title'
+        elif 'bf:Contribution' in line:
+            current_class = 'Contribution'
+        # Collect lines for current violation
+        if 'Constraint Violation' in line:
+            if current_section:
+                class_results[current_class].extend(current_section)
+            current_section = [line]
+        elif line.strip():
+            current_section.append(line)
+    # Add last section
+    if current_section:
+        class_results[current_class].extend(current_section)
+    # Remove empty classes
+    return {k: '\n'.join(v) for k, v in class_results.items() if v}
 def get_ai_suggestions(validation_results: str, rdf_content: str, include_warnings: bool = False) -> str:
     """
     Generate AI-powered fix suggestions for invalid RDF/XML.
         severity_instruction = "Focus only on violations (errors) and ignore any warnings." if not include_warnings else "Address both violations and warnings."
+        # Filter validation results by class to reduce token usage
+        class_results = filter_validation_results_by_class(validation_results, rdf_content)
+        # Determine primary class with most errors
+        primary_class = max(class_results.keys(), key=lambda k: len(class_results[k]))
+        focused_results = class_results[primary_class]
+        # Extract only relevant RDF section for the primary class
+        relevant_rdf = extract_relevant_rdf_section(rdf_content, primary_class)
+        prompt = f"""You are an expert in RDF/XML and SHACL validation. Analyze the validation errors for the {primary_class} class and provide CONCISE, ACTIONABLE fixes.
+{severity_instruction}
+Validation Errors for {primary_class}:
+{focused_results[:1500]}
+Relevant RDF Section:
+{relevant_rdf[:800]}
+Instructions:
+1. ONE sentence: What's wrong with this {primary_class}?
+2. List errors (max 3 words each)
+3. Show exact XML fixes
+Format:
+**Issue:** [One sentence about the {primary_class} problem]
+**Errors:**
+• Error 1
+• Error 2
+**Fix:**
+```xml
+[Complete corrected {primary_class} section]
+```
+Be ultra-concise. Show the fix, not explanations."""
+        # Make API call using OpenAI client
+        print(f"🔄 Making focused API call for {primary_class} class")
+        print(f"🔄 Sending {len(focused_results)} chars instead of {len(validation_results)} chars")
+        chat_completion = client.chat.completions.create(
+            model=HF_MODEL,
+            messages=[
+                {
+                    "role": "user",
+                    "content": prompt
+                }
+            ],
+            max_tokens=800,  # Reduced since we're focused on one class
+            temperature=0.5,  # Lower temperature for more focused responses
+            top_p=0.9
+        )
+        print("✅ API call successful")
+        generated_text = chat_completion.choices[0].message.content
+        # Add note about other classes if present
+        other_classes = [k for k in class_results.keys() if k != primary_class]
+        class_note = f"\n\n📌 **Note:** Focused on {primary_class} errors. " + \
+                     (f"Also found issues in: {', '.join(other_classes)}" if other_classes else "")
+        return f"🤖 **AI-Powered Suggestions ({('Violations + Warnings' if include_warnings else 'Violations Only')}):**\n\n{generated_text}{class_note}"
+    except Exception as e:
+        logger.error(f"OpenAI/HF Inference Endpoint error: {str(e)}")
+        return f"""
+❌ **AI suggestions error**: {str(e)}
+{generate_manual_suggestions(validation_results)}
+"""
+def extract_relevant_rdf_section(rdf_content: str, class_name: str) -> str:
+    """
+    Extract only the relevant RDF section for a specific class
+    Args:
+        rdf_content (str): Full RDF content
+        class_name (str): Class name to extract (Work, Instance, etc.)
+    Returns:
+        str: Relevant RDF section
+    """
+    import re
+    # Map class names to RDF patterns
+    patterns = {
+        'Work': r'<bf:Work.*?</bf:Work>',
+        'Instance': r'<bf:Instance.*?</bf:Instance>',
+        'Title': r'<bf:Title.*?</bf:Title>',
+        'Contribution': r'<bf:Contribution.*?</bf:Contribution>'
+    }
+    pattern = patterns.get(class_name)
+    if not pattern:
+        return rdf_content[:1000]  # Fallback to first 1000 chars
+    # Extract matching section
+    match = re.search(pattern, rdf_content, re.DOTALL)
+    if match:
+        section = match.group(0)
+        # Also include namespace declarations
+        namespaces = re.findall(r'xmlns:\w+="[^"]*"', rdf_content[:500])
+        if namespaces:
+            return f"<!-- Namespaces: {' '.join(namespaces[:3])} -->\n{section}"
+        return section
+    return rdf_content[:1000]  # Fallback
+def get_ai_correction(validation_results: str, rdf_content: str, template: str = 'monograph', max_attempts: int = None, include_warnings: bool = False) -> str:
+    """
+    Generate AI-powered corrected RDF/XML based on validation errors.
+    This tool takes invalid RDF/XML and validation results, then generates
+    a corrected version that addresses all identified validation issues.
+    The generated correction is validated before being returned to the user.
+    Args:
+        validation_results (str): The validation error messages
+        rdf_content (str): The original invalid RDF/XML content
+        template (str): The validation template to use
+        max_attempts (int): Maximum number of attempts to generate valid RDF (uses MAX_CORRECTION_ATTEMPTS if None)
+        include_warnings (bool): Whether to fix warnings in addition to violations
+    Returns:
+        str: Corrected RDF/XML that should pass validation
+    """
+    # Use configuration default if not specified
+    if max_attempts is None:
+        max_attempts = MAX_CORRECTION_ATTEMPTS
+    # Check if validation loop is enabled
+    if not ENABLE_VALIDATION_LOOP:
+        max_attempts = 1  # Fall back to single attempt if validation loop disabled
+    if not OPENAI_AVAILABLE:
+        return generate_manual_correction_hints(validation_results, rdf_content)
+    # Get API key dynamically at runtime
+    current_api_key = os.getenv('HF_API_KEY', '')
+    if not current_api_key:
+        return f"""<!-- AI correction disabled: Set HF_API_KEY as a Secret in your Space settings -->
+{generate_manual_correction_hints(validation_results, rdf_content)}"""
+    try:
+        client = get_openai_client()
+        if not client:
+            return f"""<!-- AI correction disabled: HF_API_KEY not configured -->
+{generate_manual_correction_hints(validation_results, rdf_content)}"""
+        # Add timeout protection
+        import time
+        start_time = time.time()
+        timeout = 60  # 60 second timeout
+        severity_instruction = "Fix only the violations (errors) and ignore any warnings." if not include_warnings else "Fix both violations and warnings."
+        # Filter validation results by class
+        class_results = filter_validation_results_by_class(validation_results, rdf_content)
+        # Process each class separately to avoid overwhelming the LLM
+        corrected_sections = {}
+        for class_name, class_errors in class_results.items():
+            if not class_errors:
+                continue
+            # Check timeout
+            if time.time() - start_time > timeout - 10:
+                print(f"⏰ Approaching timeout, skipping {class_name}")
+                break
+            print(f"🔄 Correcting {class_name} section")
+            # Extract relevant section
+            relevant_section = extract_relevant_rdf_section(rdf_content, class_name)
+            prompt = f"""Fix this {class_name} RDF section based on these specific errors.
+{severity_instruction}
+Errors for {class_name}:
+{class_errors[:800]}
+Current {class_name} RDF:
+{relevant_section[:800]}
+Return ONLY the corrected {class_name} XML section. No explanations."""
+            try:
+                chat_completion = client.chat.completions.create(
+                    model=HF_MODEL,
+                    messages=[
+                        {
+                            "role": "user",
+                            "content": prompt
+                        }
+                    ],
+                    max_tokens=1000,
+                    temperature=0.3,
+                    timeout=20  # Shorter timeout per section
+                )
+                corrected_section = chat_completion.choices[0].message.content.strip()
+                corrected_sections[class_name] = extract_rdf_from_response(corrected_section)
+            except Exception as e:
+                print(f"❌ Error correcting {class_name}: {str(e)}")
+                continue
+        # Merge corrections back into original RDF
+        if corrected_sections:
+            corrected_rdf = merge_corrected_sections(rdf_content, corrected_sections)
+            return f"""<!-- AI-generated correction (class-based processing) -->
+{corrected_rdf}"""
+        else:
+            return f"""<!-- AI correction failed - timeout or errors -->
+{generate_manual_correction_hints(validation_results, rdf_content)}"""
+    except Exception as e:
+        logger.error(f"LLM API error: {str(e)}")
+        return f"""<!-- Error generating AI correction: {str(e)} -->
+{generate_manual_correction_hints(validation_results, rdf_content)}"""
+def merge_corrected_sections(original_rdf: str, corrected_sections: dict) -> str:
+    """
+    Merge corrected class sections back into the original RDF
+    Args:
+        original_rdf (str): Original RDF content
+        corrected_sections (dict): Corrected sections by class
+    Returns:
+        str: Merged RDF with corrections
+    """
+    import re
+    result = original_rdf
+    # Replace each corrected section
+    for class_name, corrected_section in corrected_sections.items():
+        patterns = {
+            'Work': r'<bf:Work.*?</bf:Work>',
+            'Instance': r'<bf:Instance.*?</bf:Instance>',
+            'Title': r'<bf:Title.*?</bf:Title>',
+            'Contribution': r'<bf:Contribution.*?</bf:Contribution>'
+        }
+        pattern = patterns.get(class_name)
+        if pattern:
+            result = re.sub(pattern, corrected_section, result, count=1, flags=re.DOTALL)
+    return result
+# Sample RDF data for examples
+# MCP Server Tools (can be used independently)
+# Note: This section exists earlier in the file, we're removing the duplicates
+    """
+    Generate AI-powered fix suggestions for invalid RDF/XML.
+    This tool analyzes validation results and provides actionable suggestions
+    for fixing RDF/XML validation errors using AI or rule-based analysis.
+    Args:
+        validation_results (str): The validation error messages
+        rdf_content (str): The original RDF/XML content that failed validation
+        include_warnings (bool): Whether to include warnings in suggestions
+    Returns:
+        str: Detailed suggestions for fixing the RDF validation issues
+    """
+    if not OPENAI_AVAILABLE:
+        return generate_manual_suggestions(validation_results)
+    # Get API key dynamically at runtime
+    current_api_key = os.getenv('HF_API_KEY', '')
+    if not current_api_key:
+        return f"""
+🔑 **AI suggestions disabled**: Please set your Hugging Face API key as a Secret in your Space settings.
+{generate_manual_suggestions(validation_results)}
+"""
+    try:
+        # Use OpenAI client with your Hugging Face Inference Endpoint
+        client = get_openai_client()
+        if not client:
+            return f"""
+🔑 **AI suggestions disabled**: HF_API_KEY not configured.
+{generate_manual_suggestions(validation_results)}
+"""
+        severity_instruction = "Focus only on violations (errors) and ignore any warnings." if not include_warnings else "Address both violations and warnings."
+        prompt = f"""You are an expert in RDF/XML and SHACL validation. Analyze the validation errors and provide CONCISE, ACTIONABLE fix suggestions.
 {severity_instruction}
 Original RDF (first 1000 chars):
 {rdf_content[:1000]}...
+Instructions:
+1. Start with a ONE-SENTENCE summary of the main issue
+2. List the specific errors in bullet points (max 5 words per error)
+3. Provide the exact fix for each error with code snippets
+4. Keep explanations minimal - focus on solutions
+Format:
+**Main Issue:** [One sentence]
+**Errors Found:**
+• Error 1 name
+• Error 2 name
+**Fixes:**
+1. **Error 1**:
+   ```xml
+   [exact code to add/fix]
+   ```
+2. **Error 2**:
+   ```xml
+   [exact code to add/fix]
+   ```
+Be direct and solution-focused. No lengthy explanations."""
         # Make API call using OpenAI client
         print(f"🔄 Making API call to: {HF_ENDPOINT_URL}")