D Ф m i И i q ц e L Ф y e r commited on
Commit
23d087d
·
1 Parent(s): 020a2c1

Add RDF import script for Supabase

Browse files
Files changed (1) hide show
  1. import_rdf_to_supabase.py +108 -0
import_rdf_to_supabase.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Import Turtle RDF data into Supabase
4
+ """
5
+ import requests
6
+ from rdflib import Graph, Literal, URIRef, Namespace
7
+ from pathlib import Path
8
+
9
+ SUPABASE_URL = "https://zmluirvqfkmfazqitqgi.supabase.co"
10
+ # Use service role key for insert operations
11
+ SUPABASE_KEY = "sb_secret_2CWvKTeiJ-gbQn5AGCI8YQ_vOujwVnt"
12
+
13
+ headers = {
14
+ 'apikey': SUPABASE_KEY,
15
+ 'Authorization': f'Bearer {SUPABASE_KEY}',
16
+ 'Content-Type': 'application/json',
17
+ 'Prefer': 'resolution=merge-duplicates'
18
+ }
19
+
20
+ # Namespace mapping - old github to new syscred.uqam.ca
21
+ OLD_NS = "https://github.com/DominiqueLoyer/systemFactChecking#"
22
+ NEW_NS = "https://syscred.uqam.ca/ontology#"
23
+
24
+ def transform_uri(uri):
25
+ """Transform old github namespace to new syscred namespace"""
26
+ uri = str(uri)
27
+ if OLD_NS in uri:
28
+ return uri.replace(OLD_NS, NEW_NS)
29
+ return uri
30
+
31
+ # Parse Turtle files
32
+ print("=== Parsing Turtle files ===")
33
+ g = Graph()
34
+
35
+ # Parse base ontology
36
+ base_file = Path("ontology/sysCRED_onto26avrtil.ttl")
37
+ if base_file.exists():
38
+ print(f"Loading: {base_file}")
39
+ g.parse(str(base_file), format='turtle')
40
+
41
+ # Parse data ontology
42
+ data_file = Path("ontology/sysCRED_data.ttl")
43
+ if data_file.exists():
44
+ print(f"Loading: {data_file}")
45
+ g.parse(str(data_file), format='turtle')
46
+
47
+ print(f"Total triples parsed: {len(g)}")
48
+
49
+ # Transform and insert
50
+ print("\n=== Inserting into Supabase (rdf_triples table) ===")
51
+
52
+ batch = []
53
+ batch_size = 100
54
+ total_inserted = 0
55
+
56
+ for i, (s, p, o) in enumerate(g):
57
+ # Transform URIs
58
+ subject = transform_uri(s)
59
+ predicate = transform_uri(p)
60
+ obj_value = transform_uri(o) if isinstance(o, URIRef) else str(o)
61
+ obj_type = 'literal' if isinstance(o, Literal) else 'uri'
62
+ graph_name = 'base' if OLD_NS in str(s) or 'Expert' in str(s) else 'data'
63
+
64
+ batch.append({
65
+ 'subject': subject[:500],
66
+ 'predicate': predicate[:500],
67
+ 'object': obj_value,
68
+ 'object_type': obj_type,
69
+ 'graph_name': graph_name
70
+ })
71
+
72
+ # Insert in batches
73
+ if len(batch) >= batch_size:
74
+ response = requests.post(
75
+ f"{SUPABASE_URL}/rest/v1/rdf_triples",
76
+ headers=headers,
77
+ json=batch
78
+ )
79
+ if response.status_code in [200, 201]:
80
+ total_inserted += len(batch)
81
+ print(f" Inserted batch {total_inserted//batch_size}: {len(batch)} triples")
82
+ else:
83
+ print(f" Error: {response.status_code} - {response.text[:100]}")
84
+ batch = []
85
+
86
+ # Insert remaining
87
+ if batch:
88
+ response = requests.post(
89
+ f"{SUPABASE_URL}/rest/v1/rdf_triples",
90
+ headers=headers,
91
+ json=batch
92
+ )
93
+ if response.status_code in [200, 201]:
94
+ total_inserted += len(batch)
95
+ print(f" Inserted final batch: {len(batch)} triples")
96
+ else:
97
+ print(f" Error: {response.status_code}")
98
+
99
+ print(f"\n✓ Total inserted: {total_inserted}")
100
+
101
+ # Verify
102
+ print("\n=== Verifying ===")
103
+ response = requests.get(
104
+ f"{SUPABASE_URL}/rest/v1/rdf_triples?select=id",
105
+ headers={'apikey': SUPABASE_KEY, 'Authorization': f'Bearer {SUPABASE_KEY}'}
106
+ )
107
+ if response.status_code == 200:
108
+ print(f"Total in rdf_triples: {len(response.json())}")