Spaces:
Sleeping
Sleeping
RDF Validation Deployment commited on
Commit Β·
3804b9b
1
Parent(s): 1a6e667
Add comprehensive RDF debugging and improved property extraction - 2025-10-04 15:23:17
Browse files
app.py
CHANGED
|
@@ -71,9 +71,32 @@ class BIBFRAMEKnowledgeBase:
|
|
| 71 |
format="xml"
|
| 72 |
)
|
| 73 |
|
| 74 |
-
#
|
| 75 |
-
print("π
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
property_types = [RDF.Property, RDFS.Property]
|
|
|
|
|
|
|
| 77 |
for prop_type in property_types:
|
| 78 |
prop_count = 0
|
| 79 |
for prop in self.ontology_graph.subjects(RDF.type, prop_type):
|
|
@@ -90,6 +113,32 @@ class BIBFRAMEKnowledgeBase:
|
|
| 90 |
prop_count += 1
|
| 91 |
print(f"π Found {prop_count} properties of type {prop_type}")
|
| 92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
# Also try owl:ObjectProperty and owl:DatatypeProperty
|
| 94 |
from rdflib import OWL
|
| 95 |
owl_property_types = [OWL.ObjectProperty, OWL.DatatypeProperty, OWL.AnnotationProperty]
|
|
@@ -111,6 +160,8 @@ class BIBFRAMEKnowledgeBase:
|
|
| 111 |
pass # OWL might not be available
|
| 112 |
|
| 113 |
# Extract classes
|
|
|
|
|
|
|
| 114 |
for cls in self.ontology_graph.subjects(RDF.type, RDFS.Class):
|
| 115 |
if str(cls).startswith(str(BF)):
|
| 116 |
local_name = str(cls).replace(str(BF), "")
|
|
@@ -120,9 +171,21 @@ class BIBFRAMEKnowledgeBase:
|
|
| 120 |
"definition": self._get_comment(cls),
|
| 121 |
"subClassOf": self._get_super_classes(cls)
|
| 122 |
}
|
|
|
|
|
|
|
| 123 |
|
| 124 |
self._loaded = True
|
| 125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
|
| 127 |
except Exception as e:
|
| 128 |
print(f"β οΈ Error loading ontology: {e}")
|
|
@@ -132,7 +195,9 @@ class BIBFRAMEKnowledgeBase:
|
|
| 132 |
|
| 133 |
def _load_minimal_fallback(self):
|
| 134 |
"""Minimal fallback data if ontology loading fails"""
|
| 135 |
-
|
|
|
|
|
|
|
| 136 |
"bf:assigner": {
|
| 137 |
"uri": "http://id.loc.gov/ontologies/bibframe/assigner",
|
| 138 |
"label": "Assigner",
|
|
@@ -149,9 +214,10 @@ class BIBFRAMEKnowledgeBase:
|
|
| 149 |
"range": ["http://id.loc.gov/ontologies/bibframe/Title"],
|
| 150 |
"subPropertyOf": []
|
| 151 |
}
|
| 152 |
-
|
| 153 |
|
| 154 |
-
self.classes
|
|
|
|
| 155 |
"bf:Work": {
|
| 156 |
"uri": "http://id.loc.gov/ontologies/bibframe/Work",
|
| 157 |
"label": "Work",
|
|
@@ -164,10 +230,10 @@ class BIBFRAMEKnowledgeBase:
|
|
| 164 |
"definition": "Individual exemplar of a Work",
|
| 165 |
"subClassOf": ["http://id.loc.gov/ontologies/bibframe/Resource"]
|
| 166 |
}
|
| 167 |
-
|
| 168 |
|
| 169 |
self._loaded = True
|
| 170 |
-
print("π¦ Loaded fallback data:
|
| 171 |
|
| 172 |
def _get_label(self, resource):
|
| 173 |
return str(self.ontology_graph.value(resource, RDFS.label) or "")
|
|
|
|
| 71 |
format="xml"
|
| 72 |
)
|
| 73 |
|
| 74 |
+
# Debug: Let's see what's actually in the ontology
|
| 75 |
+
print("π Analyzing ontology structure...")
|
| 76 |
+
|
| 77 |
+
# Check what namespaces are used
|
| 78 |
+
namespaces = list(self.ontology_graph.namespaces())
|
| 79 |
+
print(f"π Found namespaces: {[f'{prefix}: {ns}' for prefix, ns in namespaces[:5]]}")
|
| 80 |
+
|
| 81 |
+
# Check what types are actually used
|
| 82 |
+
all_types = set()
|
| 83 |
+
for s, p, o in self.ontology_graph.triples((None, RDF.type, None)):
|
| 84 |
+
all_types.add(str(o))
|
| 85 |
+
print(f"π Found types: {list(all_types)[:5]}...")
|
| 86 |
+
|
| 87 |
+
# Try to find ALL properties regardless of type
|
| 88 |
+
print("π Looking for all BIBFRAME properties...")
|
| 89 |
+
bf_subjects = set()
|
| 90 |
+
for subj in self.ontology_graph.subjects():
|
| 91 |
+
if str(subj).startswith(str(BF)):
|
| 92 |
+
bf_subjects.add(subj)
|
| 93 |
+
|
| 94 |
+
print(f"π Found {len(bf_subjects)} BIBFRAME subjects total")
|
| 95 |
+
|
| 96 |
+
# Extract properties - try multiple approaches
|
| 97 |
property_types = [RDF.Property, RDFS.Property]
|
| 98 |
+
|
| 99 |
+
# First, try standard property types
|
| 100 |
for prop_type in property_types:
|
| 101 |
prop_count = 0
|
| 102 |
for prop in self.ontology_graph.subjects(RDF.type, prop_type):
|
|
|
|
| 113 |
prop_count += 1
|
| 114 |
print(f"π Found {prop_count} properties of type {prop_type}")
|
| 115 |
|
| 116 |
+
# Try to extract properties that have domain/range but no explicit type
|
| 117 |
+
print("π Looking for properties with domain/range...")
|
| 118 |
+
domain_props = set()
|
| 119 |
+
for prop in self.ontology_graph.subjects(RDFS.domain, None):
|
| 120 |
+
if str(prop).startswith(str(BF)):
|
| 121 |
+
domain_props.add(prop)
|
| 122 |
+
|
| 123 |
+
for prop in self.ontology_graph.subjects(RDFS.range, None):
|
| 124 |
+
if str(prop).startswith(str(BF)):
|
| 125 |
+
domain_props.add(prop)
|
| 126 |
+
|
| 127 |
+
print(f"π Found {len(domain_props)} properties with domain/range")
|
| 128 |
+
|
| 129 |
+
# Add these as properties
|
| 130 |
+
for prop in domain_props:
|
| 131 |
+
local_name = str(prop).replace(str(BF), "")
|
| 132 |
+
if f"bf:{local_name}" not in self.properties:
|
| 133 |
+
self.properties[f"bf:{local_name}"] = {
|
| 134 |
+
"uri": str(prop),
|
| 135 |
+
"label": self._get_label(prop),
|
| 136 |
+
"definition": self._get_comment(prop),
|
| 137 |
+
"domain": self._get_domains(prop),
|
| 138 |
+
"range": self._get_ranges(prop),
|
| 139 |
+
"subPropertyOf": self._get_super_properties(prop)
|
| 140 |
+
}
|
| 141 |
+
|
| 142 |
# Also try owl:ObjectProperty and owl:DatatypeProperty
|
| 143 |
from rdflib import OWL
|
| 144 |
owl_property_types = [OWL.ObjectProperty, OWL.DatatypeProperty, OWL.AnnotationProperty]
|
|
|
|
| 160 |
pass # OWL might not be available
|
| 161 |
|
| 162 |
# Extract classes
|
| 163 |
+
print("π Looking for classes...")
|
| 164 |
+
class_count = 0
|
| 165 |
for cls in self.ontology_graph.subjects(RDF.type, RDFS.Class):
|
| 166 |
if str(cls).startswith(str(BF)):
|
| 167 |
local_name = str(cls).replace(str(BF), "")
|
|
|
|
| 171 |
"definition": self._get_comment(cls),
|
| 172 |
"subClassOf": self._get_super_classes(cls)
|
| 173 |
}
|
| 174 |
+
class_count += 1
|
| 175 |
+
print(f"π Found {class_count} classes")
|
| 176 |
|
| 177 |
self._loaded = True
|
| 178 |
+
total_props = len(self.properties)
|
| 179 |
+
total_classes = len(self.classes)
|
| 180 |
+
print(f"β
Loaded {total_props} properties and {total_classes} classes from ontology")
|
| 181 |
+
|
| 182 |
+
# If we didn't find any properties, something went wrong
|
| 183 |
+
if total_props == 0 and total_classes == 0:
|
| 184 |
+
print("β οΈ No properties or classes found - this seems wrong!")
|
| 185 |
+
print("π¦ Adding minimal fallback data...")
|
| 186 |
+
self._load_minimal_fallback()
|
| 187 |
+
else:
|
| 188 |
+
print("π Successfully loaded BIBFRAME ontology data!")
|
| 189 |
|
| 190 |
except Exception as e:
|
| 191 |
print(f"β οΈ Error loading ontology: {e}")
|
|
|
|
| 195 |
|
| 196 |
def _load_minimal_fallback(self):
|
| 197 |
"""Minimal fallback data if ontology loading fails"""
|
| 198 |
+
# Don't overwrite existing data
|
| 199 |
+
if not self.properties:
|
| 200 |
+
self.properties = {
|
| 201 |
"bf:assigner": {
|
| 202 |
"uri": "http://id.loc.gov/ontologies/bibframe/assigner",
|
| 203 |
"label": "Assigner",
|
|
|
|
| 214 |
"range": ["http://id.loc.gov/ontologies/bibframe/Title"],
|
| 215 |
"subPropertyOf": []
|
| 216 |
}
|
| 217 |
+
}
|
| 218 |
|
| 219 |
+
if not self.classes:
|
| 220 |
+
self.classes = {
|
| 221 |
"bf:Work": {
|
| 222 |
"uri": "http://id.loc.gov/ontologies/bibframe/Work",
|
| 223 |
"label": "Work",
|
|
|
|
| 230 |
"definition": "Individual exemplar of a Work",
|
| 231 |
"subClassOf": ["http://id.loc.gov/ontologies/bibframe/Resource"]
|
| 232 |
}
|
| 233 |
+
}
|
| 234 |
|
| 235 |
self._loaded = True
|
| 236 |
+
print(f"π¦ Loaded fallback data: {len(self.properties)} properties, {len(self.classes)} classes")
|
| 237 |
|
| 238 |
def _get_label(self, resource):
|
| 239 |
return str(self.ontology_graph.value(resource, RDFS.label) or "")
|