RDF Validation Deployment commited on
Commit
4f8c53c
·
1 Parent(s): 7c7b0c4

WIP before syncing with hf-https/main

Browse files
APIarchitecture.mmd ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ graph TB
2
+ subgraph "Client Applications"
3
+ APP1[Python App]
4
+ APP2[Node.js App]
5
+ APP3[Claude Desktop]
6
+ APP4[Web App]
7
+ end
8
+
9
+ subgraph "API Endpoints"
10
+ REST[REST API<br/>HTTP/JSON]
11
+ SSE[SSE Endpoint<br/>MCP Protocol]
12
+ GRAD[Gradio API<br/>WebSocket]
13
+ end
14
+
15
+ subgraph "Integration Methods"
16
+ M1[Direct HTTP Calls]
17
+ M2[MCP Client SDK]
18
+ M3[Gradio Client]
19
+ end
20
+
21
+ subgraph "API Functions"
22
+ direction TB
23
+ FUNC[Available Functions]
24
+ F1[validate_rdf_tool<br/>POST /api/validate]
25
+ F2[get_ai_suggestions<br/>POST /api/suggest]
26
+ F3[get_ai_correction<br/>POST /api/correct]
27
+ F4[get_rdf_examples<br/>GET /api/examples]
28
+
29
+ FUNC --> F1
30
+ FUNC --> F2
31
+ FUNC --> F3
32
+ FUNC --> F4
33
+ end
34
+
35
+ APP1 --> M1
36
+ APP2 --> M1
37
+ APP3 --> M2
38
+ APP4 --> M3
39
+
40
+ M1 --> REST
41
+ M2 --> SSE
42
+ M3 --> GRAD
43
+
44
+ REST --> FUNC
45
+ SSE --> FUNC
46
+ GRAD --> FUNC
RDFandLLMcorrectionflow.mmd ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ sequenceDiagram
2
+ participant User
3
+ participant App as MCP4RDF Service
4
+ participant Val as Validator
5
+ participant SHACL as SHACL Engine
6
+ participant LLM as LLM (Llama-3.3)
7
+
8
+ User->>App: Submit RDF/XML
9
+ App->>Val: validate_rdf_tool()
10
+ Val->>SHACL: Load template & validate
11
+
12
+ alt Valid RDF
13
+ SHACL-->>Val: Conforms
14
+ Val-->>App: {conforms: true, status: "Valid"}
15
+ App-->>User: Display success
16
+ else Invalid RDF
17
+ SHACL-->>Val: Violations found
18
+ Val-->>App: {conforms: false, results: "errors..."}
19
+
20
+ App->>LLM: get_ai_suggestions(errors, rdf)
21
+ Note over LLM: Analyze errors<br/>Generate fix steps
22
+ LLM-->>App: Structured suggestions
23
+
24
+ App->>LLM: get_ai_correction(errors, rdf)
25
+ Note over LLM: Generate corrected<br/>RDF/XML
26
+ LLM-->>App: Fixed RDF/XML
27
+
28
+ App-->>User: Display errors +<br/>suggestions +<br/>corrected RDF
29
+ end
app.py CHANGED
@@ -606,13 +606,17 @@ Be direct and solution-focused. No lengthy explanations."""
606
  chat_completion = client.chat.completions.create(
607
  model=HF_MODEL,
608
  messages=[
 
 
 
 
609
  {
610
  "role": "user",
611
  "content": prompt
612
  }
613
  ],
614
  max_tokens=1500,
615
- temperature=0.7,
616
  top_p=0.9
617
  )
618
 
@@ -800,48 +804,136 @@ Please provide the corrected RDF/XML that addresses all validation issues.
800
  {generate_manual_correction_hints(validation_results, rdf_content)}"""
801
 
802
  def generate_manual_suggestions(validation_results: str) -> str:
803
- """Generate rule-based suggestions when AI is not available"""
804
- suggestions = []
805
-
806
- if "Constraint Violation" in validation_results:
807
- suggestions.append("• Fix SHACL constraint violations by ensuring required properties are present")
808
-
809
- if "Missing property" in validation_results or "missing" in validation_results.lower():
810
- suggestions.append("• Add missing required properties (check template requirements)")
811
-
812
- if "datatype" in validation_results.lower():
813
- suggestions.append("• Correct data type mismatches (ensure proper literal types)")
814
-
815
- if "namespace" in validation_results.lower() or "prefix" in validation_results.lower():
816
- suggestions.append(" Add missing namespace declarations at the top of your RDF")
817
-
818
- if "XML" in validation_results or "syntax" in validation_results.lower():
819
- suggestions.append("• Fix XML syntax errors (check for unclosed tags, invalid characters)")
820
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
821
  if not suggestions:
822
- suggestions.append("• Review detailed validation results for specific issues")
823
- suggestions.append("• Ensure your RDF follows the selected template requirements")
824
-
825
  suggestions_text = "\n".join(suggestions)
826
-
827
  return f"""
828
- 📋 **Manual Analysis:**
829
 
830
  {suggestions_text}
831
 
832
- 💡 **General Tips:**
833
- Check namespace declarations at the top of your RDF
834
- Ensure all required properties are present
835
- Verify data types match expected formats
836
- Make sure XML structure is well-formed
837
-
838
- 🔧 **Common Fixes:**
839
- Add missing namespace prefixes
840
- • Include required properties like rdf:type
841
- • Fix malformed URIs or literals
842
- • Ensure proper XML syntax
843
  """
844
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
845
  def generate_manual_correction_hints(validation_results: str, rdf_content: str) -> str:
846
  """Generate manual correction hints when AI is not available"""
847
  return f"""<!-- Manual correction hints based on validation results -->
@@ -861,35 +953,35 @@ MANUAL CORRECTION STEPS:
861
  5. Validate data types
862
  -->"""
863
 
864
- def validate_rdf_interface(rdf_content: str, template: str, use_ai: bool = True, include_warnings: bool = False):
865
- """
866
- Main validation function for Gradio interface and MCP server.
867
-
868
- This function provides comprehensive RDF/XML validation with AI-powered
869
- suggestions and corrections. It serves as the primary interface for both
870
- the Gradio web UI and MCP client tools.
871
-
872
- Args:
873
- rdf_content (str): The RDF/XML content to validate
874
- template (str): Validation template to use ('monograph' or 'custom')
875
- use_ai (bool): Whether to enable AI-powered suggestions and corrections
876
- include_warnings (bool): Whether to include warnings in AI corrections (violations only by default)
877
-
878
- Returns:
879
- tuple: (status, results_text, suggestions, corrected_rdf) containing:
880
- - status: Validation status message
881
- - results_text: Detailed validation results
882
- - suggestions: AI or manual fix suggestions
883
- - corrected_rdf: AI-generated corrections or success message
884
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
885
  if not rdf_content.strip():
886
- return "❌ Error", "No RDF/XML data provided", "", ""
887
-
888
  # Validate RDF
889
  result = validate_rdf_tool(rdf_content, template)
890
 
891
  if "error" in result:
892
- return f"❌ Error: {result['error']}", "", "", ""
893
 
894
  status = result["status"]
895
  results_text = result["results"]
@@ -1102,6 +1194,20 @@ def create_interface():
1102
  show_copy_button=True,
1103
  placeholder="Corrected RDF will appear here after validation..."
1104
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1105
 
1106
  # Event handlers
1107
  validate_btn.click(
@@ -1129,8 +1235,8 @@ def create_interface():
1129
  )
1130
 
1131
  clear_btn.click(
1132
- lambda: ("", "", "", "", ""),
1133
- outputs=[rdf_input, status_output, results_output, suggestions_output, corrected_output]
1134
  )
1135
 
1136
  # Footer with instructions
 
606
  chat_completion = client.chat.completions.create(
607
  model=HF_MODEL,
608
  messages=[
609
+ {
610
+ "role": "system",
611
+ "content": "You are a friendly librarian helping fix catalog records. Never use technical RDF or SHACL terminology."
612
+ },
613
  {
614
  "role": "user",
615
  "content": prompt
616
  }
617
  ],
618
  max_tokens=1500,
619
+ temperature=0.6,
620
  top_p=0.9
621
  )
622
 
 
804
  {generate_manual_correction_hints(validation_results, rdf_content)}"""
805
 
806
  def generate_manual_suggestions(validation_results: str) -> str:
807
+ """Generate generic, pattern-based suggestions when AI is not available.
808
+
809
+ Note: Avoid hardcoding SHACL rules or specific property requirements; rely only on
810
+ patterns present in the validation output text.
811
+ """
812
+ vr_lower = validation_results.lower() if validation_results else ""
813
+ suggestions: List[str] = []
814
+
815
+ # Missing/required
816
+ if ("mincount" in vr_lower) or ("missing" in vr_lower) or ("required" in vr_lower):
817
+ suggestions.append("• Some required fields are missing. Add the missing information where indicated.")
818
+
819
+ # Too many values
820
+ if ("maxcount" in vr_lower) or ("too many" in vr_lower) or ("more than allowed" in vr_lower):
821
+ suggestions.append("• Some fields have too many values. Keep only the main/one value as required.")
822
+
823
+ # Datatype/format issues
824
+ if ("datatype" in vr_lower) or ("type mismatch" in vr_lower) or ("expected" in vr_lower and "datatype" in vr_lower):
825
+ suggestions.append("• Some values are in the wrong format. Use the expected format (e.g., dates like YYYY-MM-DD).")
826
+
827
+ # URI/identifier issues
828
+ if ("iri" in vr_lower) or ("uri" in vr_lower) or ("identifier" in vr_lower and "invalid" in vr_lower):
829
+ suggestions.append("• Some identifiers look malformed. Use complete, valid web addresses or proper identifiers.")
830
+
831
+ # Namespace/prefix issues
832
+ if ("namespace" in vr_lower) or ("prefix" in vr_lower):
833
+ suggestions.append("• Define all XML namespace prefixes at the top and use them consistently.")
834
+
835
+ # XML syntax/structure
836
+ if ("xml" in vr_lower) or ("syntax" in vr_lower) or ("well-formed" in vr_lower):
837
+ suggestions.append("• Fix XML structure issues (unclosed tags, invalid characters, or nesting problems).")
838
+
839
+ # Fallback
840
  if not suggestions:
841
+ suggestions.append("• Review the validation details and update the record where issues are highlighted.")
842
+ suggestions.append("• Follow the selected template; add missing fields and correct formats as needed.")
843
+
844
  suggestions_text = "\n".join(suggestions)
845
+
846
  return f"""
847
+ 📋 **What needs fixing:**
848
 
849
  {suggestions_text}
850
 
851
+ 💡 **Quick tips:**
852
+ Include required fields when noted
853
+ Keep single-value fields to one value
854
+ Use the expected formats (e.g., for dates)
855
+ Declare and use XML namespace prefixes consistently
856
+ • Ensure the XML is well‑formed
857
+
858
+ Need help? Load an example and compare the structure.
 
 
 
859
  """
860
 
861
+ def clean_technical_jargon(text: str) -> str:
862
+ """Replace technical RDF/SHACL terms with plain language for end users."""
863
+ if not text:
864
+ return text
865
+ replacements = {
866
+ # RDF/SHACL jargon
867
+ "URIRef": "identifier",
868
+ "URI": "identifier",
869
+ "IRI": "identifier",
870
+ "Literal": "text value",
871
+ "triple": "field entry",
872
+ "graph": "dataset",
873
+ "node": "record",
874
+ "subject": "record",
875
+ "predicate": "field type",
876
+ "object": "value",
877
+ "SHACL": "validation",
878
+ "constraint": "rule",
879
+ "conformance": "compliance",
880
+ "violation": "issue",
881
+ "sh:": "",
882
+ "rdf:": "",
883
+ "rdfs:": "",
884
+ "xsd:": "",
885
+ # Tone softening
886
+ "Error:": "Issue:",
887
+ "Invalid": "Incorrect",
888
+ "Failed": "Did not pass",
889
+ "Missing": "Not found",
890
+ }
891
+ cleaned = text
892
+ for k, v in replacements.items():
893
+ cleaned = cleaned.replace(k, v)
894
+ return cleaned
895
+
896
+ def parse_shacl_results_for_ai(results_text: str) -> str:
897
+ """Simplify SHACL results into clearer sentences for AI processing.
898
+
899
+ Pattern-based only; does not depend on any SHACL rule definitions.
900
+ """
901
+ if not results_text:
902
+ return ""
903
+ import re
904
+ simplified: List[str] = []
905
+
906
+ # Generic patterns
907
+ patterns = [
908
+ (re.compile(r"minCount", re.IGNORECASE), "A required field is missing."),
909
+ (re.compile(r"maxCount", re.IGNORECASE), "A field has more values than allowed; only one may be permitted."),
910
+ (re.compile(r"datatype", re.IGNORECASE), "A field has a value in the wrong format."),
911
+ (re.compile(r"iri|uri", re.IGNORECASE), "An identifier looks malformed or incomplete."),
912
+ (re.compile(r"namespace|prefix", re.IGNORECASE), "A namespace prefix is undefined or inconsistent."),
913
+ (re.compile(r"xml|syntax|well-formed", re.IGNORECASE), "The XML structure has an error (e.g., unclosed tag)."),
914
+ ]
915
+
916
+ lines = [ln.strip() for ln in results_text.splitlines() if ln.strip()]
917
+ for ln in lines:
918
+ matched = False
919
+ for regex, message in patterns:
920
+ if regex.search(ln):
921
+ simplified.append(message)
922
+ matched = True
923
+ break
924
+ if not matched and ("Constraint Violation" in ln or "Violation" in ln):
925
+ simplified.append("A record rule was not met.")
926
+
927
+ # Deduplicate while preserving order
928
+ seen = set()
929
+ unique = []
930
+ for s in simplified:
931
+ if s not in seen:
932
+ unique.append(s)
933
+ seen.add(s)
934
+
935
+ return "\n".join(unique) if unique else results_text
936
+
937
  def generate_manual_correction_hints(validation_results: str, rdf_content: str) -> str:
938
  """Generate manual correction hints when AI is not available"""
939
  return f"""<!-- Manual correction hints based on validation results -->
 
953
  5. Validate data types
954
  -->"""
955
 
956
+ def extract_xml_from_text(text: str) -> str:
957
+ """Extract RDF/XML from model output that may include extra formatting.
958
+
959
+ Looks for the first <rdf:RDF ...> ... </rdf:RDF> block. If not found,
960
+ returns the original text unchanged.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
961
  """
962
+ if not text:
963
+ return text
964
+ import re
965
+ # Try to capture XML block even if fenced in code blocks
966
+ # Use DOTALL to span multiple lines
967
+ pattern = re.compile(r"<rdf:RDF[\s\S]*?</rdf:RDF>", re.IGNORECASE)
968
+ m = pattern.search(text)
969
+ if m:
970
+ return m.group(0)
971
+ # Strip common markdown fences if present
972
+ fenced = re.sub(r"^```[a-zA-Z]*\n|```$", "", text.strip())
973
+ return fenced if fenced else text
974
+
975
+ def validate_rdf_interface(rdf_content: str, template: str, use_ai: bool = True):
976
+ """Main validation function for Gradio interface"""
977
  if not rdf_content.strip():
978
+ return "❌ Error", "No RDF/XML data provided", "", "", "", ""
979
+
980
  # Validate RDF
981
  result = validate_rdf_tool(rdf_content, template)
982
 
983
  if "error" in result:
984
+ return f"❌ Error: {result['error']}", "", "", "", "", ""
985
 
986
  status = result["status"]
987
  results_text = result["results"]
 
1194
  show_copy_button=True,
1195
  placeholder="Corrected RDF will appear here after validation..."
1196
  )
1197
+
1198
+ with gr.Row():
1199
+ corrected_status_output = gr.Textbox(
1200
+ label="Re-validation Status (Corrected RDF)",
1201
+ interactive=False,
1202
+ lines=1,
1203
+ elem_classes=["status-box"]
1204
+ )
1205
+ corrected_results_output = gr.Textbox(
1206
+ label="Re-validation Details",
1207
+ interactive=False,
1208
+ lines=6,
1209
+ show_copy_button=True
1210
+ )
1211
 
1212
  # Event handlers
1213
  validate_btn.click(
 
1235
  )
1236
 
1237
  clear_btn.click(
1238
+ lambda: ("", "", "", "", "", "", ""),
1239
+ outputs=[rdf_input, status_output, results_output, suggestions_output, corrected_output, corrected_status_output, corrected_results_output]
1240
  )
1241
 
1242
  # Footer with instructions
copilot-instructions.md ADDED
@@ -0,0 +1,311 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # GitHub Copilot Instructions for MCP4RDF Project
2
+
3
+ ## Project Context
4
+
5
+ This is an RDF validation tool with AI features, deployed on Hugging Face Spaces. It validates RDF/XML against SHACL schemas and provides AI-powered suggestions for fixing validation errors.
6
+
7
+ ### Key Technologies
8
+ - **Frontend**: Gradio 5.33.0
9
+ - **RDF Processing**: rdflib, pyshacl
10
+ - **AI Integration**: Hugging Face Inference API
11
+ - **Protocol**: MCP (Model Context Protocol)
12
+ - **Deployment**: Hugging Face Spaces
13
+
14
+ ### Project Structure
15
+ ```
16
+ mcp4rdf-hf-space/
17
+ ├── app.py # Main Gradio application
18
+ ├── validator.py # Core SHACL validation logic
19
+ ├── mcp_server_gradio.py # MCP server implementation
20
+ ├── MonographDCTAP/ # TSV files with SHACL definitions
21
+ ├── electronic_MonographDCTAP/ # Electronic format SHACL definitions
22
+ └── requirements.txt # Python dependencies
23
+ ```
24
+
25
+ ## Code Style Guidelines
26
+
27
+ ### Python Standards
28
+ - Use type hints for function parameters and return values
29
+ - Follow PEP 8 naming conventions
30
+ - Add docstrings for all public functions
31
+ - Use logging instead of print statements
32
+
33
+ ### RDF/SHACL Patterns
34
+ ```python
35
+ # Always bind common namespaces
36
+ STANDARD_NAMESPACES = {
37
+ "bf": "http://id.loc.gov/ontologies/bibframe/",
38
+ "bflc": "http://id.loc.gov/ontologies/bflc/",
39
+ "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
40
+ "sh": "http://www.w3.org/ns/shacl#"
41
+ }
42
+
43
+ # Use URIRef for RDF predicates
44
+ from rdflib import URIRef, Literal, Graph
45
+ sh_path = URIRef("http://www.w3.org/ns/shacl#path")
46
+ ```
47
+
48
+ ## Common Tasks and Templates
49
+
50
+ ### 1. Adding New SHACL Validation Rules
51
+ ```python
52
+ # Template for adding a new property constraint
53
+ def add_property_constraint(shape_node, property_id, constraints):
54
+ """
55
+ Add SHACL property constraints to a shape.
56
+
57
+ Args:
58
+ shape_node: RDF node representing the shape
59
+ property_id: Property identifier (e.g., "bf:title")
60
+ constraints: Dict with keys like 'mandatory', 'repeatable', 'datatype'
61
+ """
62
+ # Copilot: implement SHACL property shape creation
63
+ ```
64
+
65
+ ### 2. Parsing TSV to SHACL
66
+ ```python
67
+ # When converting TSV rows to SHACL shapes, use this pattern:
68
+ def tsv_row_to_shacl(row, graph, prefixes):
69
+ """
70
+ Convert a TSV row to SHACL constraints.
71
+ Expected columns: shapeID, propertyID, mandatory, repeatable, valueShape
72
+ """
73
+ # Copilot: handle prefix expansion and constraint mapping
74
+ ```
75
+
76
+ ### 3. Error Message Formatting
77
+ ```python
78
+ # Format validation errors for user display
79
+ def format_validation_error(result):
80
+ """
81
+ Format pyshacl validation result for Gradio display.
82
+ Include: severity, focus node, property path, and message
83
+ """
84
+ # Copilot: create user-friendly error messages with context
85
+ ```
86
+
87
+ ### 4. AI Integration Patterns
88
+ ```python
89
+ # Template for AI API calls
90
+ async def get_ai_suggestion(error_context, rdf_snippet):
91
+ """
92
+ Get AI suggestions for fixing RDF validation errors.
93
+ Uses Hugging Face Inference API with proper error handling.
94
+ """
95
+ # Copilot: implement with retry logic and timeout handling
96
+ ```
97
+
98
+ ## Debugging Helpers
99
+
100
+ ### SHACL Validation Issues
101
+ ```python
102
+ # Debug template for missing validations
103
+ def debug_shacl_targeting():
104
+ """
105
+ Log all target classes and matching nodes in the data graph.
106
+ Helps diagnose why validations aren't triggering.
107
+ """
108
+ # Copilot: implement comprehensive logging of shapes and targets
109
+ ```
110
+
111
+ ### Namespace Resolution
112
+ ```python
113
+ # Helper for namespace issues
114
+ def resolve_prefixed_uri(prefixed_id, namespace_map):
115
+ """
116
+ Resolve prefixed identifiers like 'bf:Work' to full URIs.
117
+ Handle edge cases: no prefix, already full URI, unknown prefix
118
+ """
119
+ # Copilot: implement robust prefix resolution
120
+ ```
121
+
122
+ ## MCP Server Implementation
123
+
124
+ ### Tool Registration Pattern
125
+ ```python
126
+ # MCP tool definition template
127
+ @mcp_server.tool()
128
+ async def new_mcp_tool(param1: str, param2: Optional[str] = None) -> dict:
129
+ """
130
+ MCP tool implementation.
131
+ Returns: {"success": bool, "result": Any, "error": Optional[str]}
132
+ """
133
+ # Copilot: implement with proper error handling and logging
134
+ ```
135
+
136
+ ### SSE Event Formatting
137
+ ```python
138
+ # Server-Sent Events response pattern
139
+ def format_sse_response(tool_name, result):
140
+ """
141
+ Format MCP tool response as SSE event.
142
+ Include proper event type and JSON encoding.
143
+ """
144
+ # Copilot: implement SSE formatting with error states
145
+ ```
146
+
147
+ ## Testing Patterns
148
+
149
+ ### Unit Test Templates
150
+ ```python
151
+ # Test SHACL shape generation
152
+ def test_shape_generation():
153
+ """
154
+ Test that TSV rows correctly generate SHACL shapes.
155
+ Include: basic properties, cardinality, value shapes
156
+ """
157
+ # Copilot: generate comprehensive test cases
158
+
159
+ # Test RDF validation
160
+ def test_rdf_validation():
161
+ """
162
+ Test validation with various RDF inputs.
163
+ Include: valid, invalid, edge cases
164
+ """
165
+ # Copilot: create test data and assertions
166
+ ```
167
+
168
+ ### Integration Test Patterns
169
+ ```python
170
+ # Test MCP server endpoints
171
+ async def test_mcp_endpoints():
172
+ """
173
+ Test all MCP tools with realistic inputs.
174
+ Verify: response format, error handling, performance
175
+ """
176
+ # Copilot: implement async test scenarios
177
+ ```
178
+
179
+ ## Performance Optimization
180
+
181
+ ### Caching Strategies
182
+ ```python
183
+ # Cache compiled SHACL graphs
184
+ @lru_cache(maxsize=10)
185
+ def get_compiled_shacl_graph(template_name):
186
+ """
187
+ Cache parsed SHACL graphs to avoid repeated parsing.
188
+ """
189
+ # Copilot: implement with proper cache invalidation
190
+
191
+ # Cache namespace resolutions
192
+ @lru_cache(maxsize=1000)
193
+ def cached_uri_resolution(prefixed_id, namespace_json):
194
+ """
195
+ Cache URI resolutions to improve performance.
196
+ """
197
+ # Copilot: implement with hashable inputs
198
+ ```
199
+
200
+ ### Batch Processing
201
+ ```python
202
+ # Process multiple RDF documents efficiently
203
+ async def batch_validate_rdf(rdf_documents: List[str]):
204
+ """
205
+ Validate multiple RDF documents in parallel.
206
+ Use asyncio for concurrent processing.
207
+ """
208
+ # Copilot: implement with progress tracking
209
+ ```
210
+
211
+ ## Common Pitfalls to Avoid
212
+
213
+ 1. **Namespace Conflicts**: Always use `override=True` when binding namespaces
214
+ 2. **Graph Parsing**: Specify format explicitly, don't rely on auto-detection
215
+ 3. **SPARQL Queries**: Escape special characters in URIs
216
+ 4. **Async/Await**: Don't mix synchronous and asynchronous code
217
+ 5. **Error Messages**: Always include context for debugging
218
+
219
+ ## Gradio UI Enhancements
220
+
221
+ ### Adding New UI Components
222
+ ```python
223
+ # Template for new Gradio components
224
+ def create_validation_interface():
225
+ """
226
+ Create Gradio interface with:
227
+ - File upload for RDF
228
+ - Template selection
229
+ - Real-time validation
230
+ - Export functionality
231
+ """
232
+ # Copilot: implement with proper event handlers
233
+ ```
234
+
235
+ ### Custom CSS/Theming
236
+ ```python
237
+ # Apply custom styling to Gradio components
238
+ custom_css = """
239
+ .validation-error { color: red; font-weight: bold; }
240
+ .validation-warning { color: orange; }
241
+ .validation-info { color: blue; }
242
+ """
243
+ # Copilot: suggest CSS for better UX
244
+ ```
245
+
246
+ ## Deployment Considerations
247
+
248
+ ### Hugging Face Spaces Configuration
249
+ ```python
250
+ # Environment variable handling
251
+ HF_API_KEY = os.environ.get("HF_API_KEY")
252
+ if not HF_API_KEY:
253
+ logger.warning("HF_API_KEY not set, AI features disabled")
254
+
255
+ # Gradio launch configuration for Spaces
256
+ demo.launch(
257
+ server_name="0.0.0.0",
258
+ server_port=7860,
259
+ share=False # Don't use share=True on Spaces
260
+ )
261
+ ```
262
+
263
+ ### Error Recovery
264
+ ```python
265
+ # Implement graceful degradation
266
+ def safe_ai_call(func):
267
+ """
268
+ Decorator for AI calls that falls back gracefully.
269
+ """
270
+ # Copilot: implement with fallback behavior
271
+ ```
272
+
273
+ ## Quick Reference
274
+
275
+ ### Essential Imports
276
+ ```python
277
+ import gradio as gr
278
+ import rdflib
279
+ from rdflib import Graph, URIRef, Literal, Namespace, RDF, RDFS
280
+ from pyshacl import validate
281
+ import pandas as pd
282
+ import logging
283
+ import asyncio
284
+ from typing import Optional, Dict, List, Any
285
+ ```
286
+
287
+ ### Debugging Commands
288
+ ```python
289
+ # Log graph contents
290
+ logger.debug(f"Graph has {len(graph)} triples")
291
+ logger.debug(graph.serialize(format='turtle'))
292
+
293
+ # Log validation details
294
+ conforms, results_graph, results_text = validate(
295
+ data_graph,
296
+ shacl_graph=shapes,
297
+ debug=True,
298
+ inference='rdfs'
299
+ )
300
+ ```
301
+
302
+ ### Common SHACL Properties
303
+ - `sh:targetClass` - Define which RDF types to validate
304
+ - `sh:path` - Property to validate
305
+ - `sh:minCount` - Minimum occurrences (1 for mandatory)
306
+ - `sh:maxCount` - Maximum occurrences (1 for non-repeatable)
307
+ - `sh:datatype` - Expected datatype
308
+ - `sh:node` - Link to another shape (valueShape)
309
+ - `sh:severity` - sh:Violation, sh:Warning, or sh:Info
310
+
311
+ Remember: Always test with real BIBFRAME data and verify MCP endpoints are accessible!
diagram1.png ADDED
diagram2.png ADDED
diagram3.png ADDED
system-architecture.mmd ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ graph TB
2
+ subgraph "User Interfaces"
3
+ UI1[Web Browser<br/>Gradio UI]
4
+ UI2[Claude Desktop<br/>MCP Client]
5
+ UI3[External Apps<br/>via API]
6
+ end
7
+
8
+ subgraph "MCP4RDF Service Layer"
9
+ APP[app.py<br/>Main Application]
10
+ API[API Endpoints]
11
+ MCP[MCP Server<br/>SSE Protocol]
12
+
13
+ APP --> API
14
+ APP --> MCP
15
+ end
16
+
17
+ subgraph "Core Functions"
18
+ F1[validate_rdf_tool]
19
+ F2[get_ai_suggestions]
20
+ F3[get_ai_correction]
21
+ F4[get_rdf_examples]
22
+ end
23
+
24
+ subgraph "Validation Engine"
25
+ VAL[validator.py]
26
+ SHACL[SHACL Templates]
27
+ PY[pyshacl Library]
28
+
29
+ VAL --> PY
30
+ PY --> SHACL
31
+ end
32
+
33
+ subgraph "AI Enhancement"
34
+ OAI[OpenAI Client]
35
+ HF[HF Inference Endpoint]
36
+ LLM[Llama-3.3-70B]
37
+
38
+ OAI --> HF
39
+ HF --> LLM
40
+ end
41
+
42
+ UI1 --> APP
43
+ UI2 --> MCP
44
+ UI3 --> API
45
+
46
+ API --> F1
47
+ API --> F2
48
+ API --> F3
49
+ API --> F4
50
+
51
+ F1 --> VAL
52
+ F2 --> OAI
53
+ F3 --> OAI