RDF Validation Deployment commited on
Commit
3804b9b
Β·
1 Parent(s): 1a6e667

Add comprehensive RDF debugging and improved property extraction - 2025-10-04 15:23:17

Browse files
Files changed (1) hide show
  1. app.py +74 -8
app.py CHANGED
@@ -71,9 +71,32 @@ class BIBFRAMEKnowledgeBase:
71
  format="xml"
72
  )
73
 
74
- # Extract properties - try multiple RDF property types
75
- print("πŸ” Looking for properties...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  property_types = [RDF.Property, RDFS.Property]
 
 
77
  for prop_type in property_types:
78
  prop_count = 0
79
  for prop in self.ontology_graph.subjects(RDF.type, prop_type):
@@ -90,6 +113,32 @@ class BIBFRAMEKnowledgeBase:
90
  prop_count += 1
91
  print(f"πŸ” Found {prop_count} properties of type {prop_type}")
92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  # Also try owl:ObjectProperty and owl:DatatypeProperty
94
  from rdflib import OWL
95
  owl_property_types = [OWL.ObjectProperty, OWL.DatatypeProperty, OWL.AnnotationProperty]
@@ -111,6 +160,8 @@ class BIBFRAMEKnowledgeBase:
111
  pass # OWL might not be available
112
 
113
  # Extract classes
 
 
114
  for cls in self.ontology_graph.subjects(RDF.type, RDFS.Class):
115
  if str(cls).startswith(str(BF)):
116
  local_name = str(cls).replace(str(BF), "")
@@ -120,9 +171,21 @@ class BIBFRAMEKnowledgeBase:
120
  "definition": self._get_comment(cls),
121
  "subClassOf": self._get_super_classes(cls)
122
  }
 
 
123
 
124
  self._loaded = True
125
- print(f"βœ… Loaded {len(self.properties)} properties and {len(self.classes)} classes")
 
 
 
 
 
 
 
 
 
 
126
 
127
  except Exception as e:
128
  print(f"⚠️ Error loading ontology: {e}")
@@ -132,7 +195,9 @@ class BIBFRAMEKnowledgeBase:
132
 
133
  def _load_minimal_fallback(self):
134
  """Minimal fallback data if ontology loading fails"""
135
- self.properties = {
 
 
136
  "bf:assigner": {
137
  "uri": "http://id.loc.gov/ontologies/bibframe/assigner",
138
  "label": "Assigner",
@@ -149,9 +214,10 @@ class BIBFRAMEKnowledgeBase:
149
  "range": ["http://id.loc.gov/ontologies/bibframe/Title"],
150
  "subPropertyOf": []
151
  }
152
- }
153
 
154
- self.classes = {
 
155
  "bf:Work": {
156
  "uri": "http://id.loc.gov/ontologies/bibframe/Work",
157
  "label": "Work",
@@ -164,10 +230,10 @@ class BIBFRAMEKnowledgeBase:
164
  "definition": "Individual exemplar of a Work",
165
  "subClassOf": ["http://id.loc.gov/ontologies/bibframe/Resource"]
166
  }
167
- }
168
 
169
  self._loaded = True
170
- print("πŸ“¦ Loaded fallback data: 2 properties, 2 classes")
171
 
172
  def _get_label(self, resource):
173
  return str(self.ontology_graph.value(resource, RDFS.label) or "")
 
71
  format="xml"
72
  )
73
 
74
+ # Debug: Let's see what's actually in the ontology
75
+ print("πŸ” Analyzing ontology structure...")
76
+
77
+ # Check what namespaces are used
78
+ namespaces = list(self.ontology_graph.namespaces())
79
+ print(f"πŸ” Found namespaces: {[f'{prefix}: {ns}' for prefix, ns in namespaces[:5]]}")
80
+
81
+ # Check what types are actually used
82
+ all_types = set()
83
+ for s, p, o in self.ontology_graph.triples((None, RDF.type, None)):
84
+ all_types.add(str(o))
85
+ print(f"πŸ” Found types: {list(all_types)[:5]}...")
86
+
87
+ # Try to find ALL properties regardless of type
88
+ print("πŸ” Looking for all BIBFRAME properties...")
89
+ bf_subjects = set()
90
+ for subj in self.ontology_graph.subjects():
91
+ if str(subj).startswith(str(BF)):
92
+ bf_subjects.add(subj)
93
+
94
+ print(f"πŸ” Found {len(bf_subjects)} BIBFRAME subjects total")
95
+
96
+ # Extract properties - try multiple approaches
97
  property_types = [RDF.Property, RDFS.Property]
98
+
99
+ # First, try standard property types
100
  for prop_type in property_types:
101
  prop_count = 0
102
  for prop in self.ontology_graph.subjects(RDF.type, prop_type):
 
113
  prop_count += 1
114
  print(f"πŸ” Found {prop_count} properties of type {prop_type}")
115
 
116
+ # Try to extract properties that have domain/range but no explicit type
117
+ print("πŸ” Looking for properties with domain/range...")
118
+ domain_props = set()
119
+ for prop in self.ontology_graph.subjects(RDFS.domain, None):
120
+ if str(prop).startswith(str(BF)):
121
+ domain_props.add(prop)
122
+
123
+ for prop in self.ontology_graph.subjects(RDFS.range, None):
124
+ if str(prop).startswith(str(BF)):
125
+ domain_props.add(prop)
126
+
127
+ print(f"πŸ” Found {len(domain_props)} properties with domain/range")
128
+
129
+ # Add these as properties
130
+ for prop in domain_props:
131
+ local_name = str(prop).replace(str(BF), "")
132
+ if f"bf:{local_name}" not in self.properties:
133
+ self.properties[f"bf:{local_name}"] = {
134
+ "uri": str(prop),
135
+ "label": self._get_label(prop),
136
+ "definition": self._get_comment(prop),
137
+ "domain": self._get_domains(prop),
138
+ "range": self._get_ranges(prop),
139
+ "subPropertyOf": self._get_super_properties(prop)
140
+ }
141
+
142
  # Also try owl:ObjectProperty and owl:DatatypeProperty
143
  from rdflib import OWL
144
  owl_property_types = [OWL.ObjectProperty, OWL.DatatypeProperty, OWL.AnnotationProperty]
 
160
  pass # OWL might not be available
161
 
162
  # Extract classes
163
+ print("πŸ” Looking for classes...")
164
+ class_count = 0
165
  for cls in self.ontology_graph.subjects(RDF.type, RDFS.Class):
166
  if str(cls).startswith(str(BF)):
167
  local_name = str(cls).replace(str(BF), "")
 
171
  "definition": self._get_comment(cls),
172
  "subClassOf": self._get_super_classes(cls)
173
  }
174
+ class_count += 1
175
+ print(f"πŸ” Found {class_count} classes")
176
 
177
  self._loaded = True
178
+ total_props = len(self.properties)
179
+ total_classes = len(self.classes)
180
+ print(f"βœ… Loaded {total_props} properties and {total_classes} classes from ontology")
181
+
182
+ # If we didn't find any properties, something went wrong
183
+ if total_props == 0 and total_classes == 0:
184
+ print("⚠️ No properties or classes found - this seems wrong!")
185
+ print("πŸ“¦ Adding minimal fallback data...")
186
+ self._load_minimal_fallback()
187
+ else:
188
+ print("πŸŽ‰ Successfully loaded BIBFRAME ontology data!")
189
 
190
  except Exception as e:
191
  print(f"⚠️ Error loading ontology: {e}")
 
195
 
196
  def _load_minimal_fallback(self):
197
  """Minimal fallback data if ontology loading fails"""
198
+ # Don't overwrite existing data
199
+ if not self.properties:
200
+ self.properties = {
201
  "bf:assigner": {
202
  "uri": "http://id.loc.gov/ontologies/bibframe/assigner",
203
  "label": "Assigner",
 
214
  "range": ["http://id.loc.gov/ontologies/bibframe/Title"],
215
  "subPropertyOf": []
216
  }
217
+ }
218
 
219
+ if not self.classes:
220
+ self.classes = {
221
  "bf:Work": {
222
  "uri": "http://id.loc.gov/ontologies/bibframe/Work",
223
  "label": "Work",
 
230
  "definition": "Individual exemplar of a Work",
231
  "subClassOf": ["http://id.loc.gov/ontologies/bibframe/Resource"]
232
  }
233
+ }
234
 
235
  self._loaded = True
236
+ print(f"πŸ“¦ Loaded fallback data: {len(self.properties)} properties, {len(self.classes)} classes")
237
 
238
  def _get_label(self, resource):
239
  return str(self.ontology_graph.value(resource, RDFS.label) or "")