umer6016 commited on
Commit
1d12e97
·
0 Parent(s):

Fresh Deploy

Browse files
Files changed (12) hide show
  1. .gitignore +10 -0
  2. Dockerfile +19 -0
  3. README.md +67 -0
  4. app.py +133 -0
  5. cars_knowledge_graph.ttl +0 -0
  6. cars_ontology.ttl +106 -0
  7. requirements.txt +3 -0
  8. src/app.py +133 -0
  9. src/convert_data.py +208 -0
  10. src/ontology.py +68 -0
  11. src/publish.py +74 -0
  12. src/validate.py +129 -0
.gitignore ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # Ignore PDF files
3
+ *.pdf
4
+
5
+ # Ignore Python cache
6
+ __pycache__/
7
+ *.pyc
8
+
9
+ # Ignore simple artifacts
10
+ *.csv
Dockerfile ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install dependencies
6
+ COPY requirements.txt .
7
+ RUN pip install --no-cache-dir -r requirements.txt
8
+
9
+ # Copy application code and data
10
+ COPY app.py .
11
+ COPY cars_knowledge_graph.ttl .
12
+ COPY cars_ontology.ttl .
13
+ COPY src/ src/
14
+
15
+ # Expose Streamlit port
16
+ EXPOSE 7860
17
+
18
+ # Run the application
19
+ CMD ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]
README.md ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Cars Knowledge Graph
3
+ emoji: 🚗
4
+ colorFrom: blue
5
+ colorTo: red
6
+ sdk: docker
7
+ pinned: false
8
+ ---
9
+
10
+ # Knowledge Representation Project: Cars Knowledge Graph
11
+
12
+ ## 1. Introduction & Motivation
13
+ This project transforms a flat CSV dataset of **Car Specifications (2025)** into a semantic **Knowledge Graph (RDF/OWL)**.
14
+ **Domain**: Automotive Industry (Cars, Manufacturers, Engines, Performance).
15
+ **Motivation**: To enable complex querying of car data that is not possible with simple tabular lookups, such as inferring "High Performance" vehicles or categorizing cars by complex criteria (e.g., specific engine types + price ranges).
16
+
17
+ ## 2. Ontology Design (Conceptual Model)
18
+ The Ontology is designed using **RDFLib** and conforms to the project requirements (20+ Classes, 7+ Properties).
19
+
20
+ ### Core Classes
21
+ - **Car**: The central entity.
22
+ - **Subclasses**: `SportsCar`, `HyperCar`, `Sedan`, `Coupe`, `ElectricCar`, `PetrolCar`, `LuxuryCar`.
23
+ - **Manufacturer**: Companies like Ferrari, Tesla, Bugatti.
24
+ - **Component**: `Engine`, `V8Engine`, `V12Engine`.
25
+ - **defined Classes**:
26
+ - `SuperCar`: Cars with Top Speed > 300 km/h.
27
+
28
+ ### Properties
29
+ - **Object Properties**: `hasManufacturer`, `hasEngine`, `usesFuel`, `manufactures`.
30
+ - **Datatype Properties**: `hasPriceValue`, `hasHorsePowerValue`, `hasSeatCount`, `hasTopSpeedKMH`.
31
+
32
+ ## 3. Knowledge Graph Construction
33
+ - **Source**: `Cars Datasets 2025.csv`
34
+ - **Output**: `cars_knowledge_graph.ttl` (Turtle Syntax)
35
+ - **Triples Generated**: ~9,842
36
+
37
+ ## 4. Competency Questions & Validation
38
+ The following questions guided the design and were validated via SPARQL:
39
+
40
+ 1. **"List all cars manufactured by Ferrari"**
41
+ - Verified: Returns models like `SF90 STRADALE`, `ROMA`, `812 GTS`.
42
+ 2. **"Which cars have > 800 HorsePower?"**
43
+ - Verified: Returns `Bugatti Chiron`, `Tesla Roadster 2`, etc.
44
+ 3. **"Count of 2-Seater Coupes"**
45
+ - Result: 147 vehicles.
46
+ 4. **"What is the average price of all cars?"**
47
+ - Result: ~$137,193 USD.
48
+
49
+ ## 5. How to Run
50
+ ### Prerequisites
51
+ - Python 3.x
52
+ - Libraries: `rdflib`, `pandas`
53
+
54
+ ### Steps
55
+ 1. **Install Dependencies**:
56
+ ```bash
57
+ pip install rdflib pandas
58
+ ```
59
+ 2. **Generate Knowledge Graph**:
60
+ ```bash
61
+ python src/ontology.py # Generates Ontology Schema
62
+ python src/convert_data.py # Generates Graph from CSV
63
+ ```
64
+ 3. **Run Validation Queries**:
65
+ ```bash
66
+ python src/validate.py
67
+ ```
app.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from rdflib import Graph, Namespace
4
+ import os
5
+
6
+ # Page Config
7
+ st.set_page_config(page_title="Cars Knowledge Graph Demo", layout="wide")
8
+
9
+ # Load Graph
10
+ @st.cache_resource
11
+ def load_graph():
12
+ g = Graph()
13
+ graph_path = "cars_knowledge_graph.ttl"
14
+ if os.path.exists(graph_path):
15
+ g.parse(graph_path, format="turtle")
16
+ return g
17
+
18
+ try:
19
+ g = load_graph()
20
+ except Exception as e:
21
+ st.error(f"Failed to load graph: {e}")
22
+ st.stop()
23
+
24
+ if len(g) == 0:
25
+ st.warning("Graph is empty or not found. Please run 'src/convert_data.py' first.")
26
+ st.stop()
27
+
28
+ # Namespaces
29
+ EX = Namespace("http://example.org/cars/")
30
+
31
+ # Sidebar Filters
32
+ st.sidebar.header("Filter Cars")
33
+
34
+ # 1. Manufacturer Filter
35
+ manu_query = """
36
+ PREFIX ex: <http://example.org/cars/>
37
+ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
38
+ SELECT DISTINCT ?name WHERE {
39
+ ?m a ex:Manufacturer ; rdfs:label ?name .
40
+ } ORDER BY ?name
41
+ """
42
+ manufacturers = ["All"] + [str(row.name) for row in g.query(manu_query)]
43
+ selected_manu = st.sidebar.selectbox("Manufacturer", manufacturers)
44
+
45
+ # 2. Price Range
46
+ price_query = """
47
+ PREFIX ex: <http://example.org/cars/>
48
+ SELECT (MIN(?p) as ?min) (MAX(?p) as ?max) WHERE { ?s ex:hasPriceValue ?p }
49
+ """
50
+ price_res = list(g.query(price_query))[0]
51
+ min_price, max_price = float(price_res.min), float(price_res.max)
52
+ selected_price = st.sidebar.slider("Max Price (USD)", min_price, max_price, max_price)
53
+
54
+ # 3. Min Horsepower
55
+ hp_query = """
56
+ PREFIX ex: <http://example.org/cars/>
57
+ SELECT (MIN(?hp) as ?min) (MAX(?hp) as ?max) WHERE { ?s ex:hasHorsePowerValue ?hp }
58
+ """
59
+ hp_res = list(g.query(hp_query))[0]
60
+ min_hp, max_hp = int(hp_res.min), int(hp_res.max)
61
+ selected_hp = st.sidebar.slider("Min Horsepower", min_hp, max_hp, min_hp)
62
+
63
+ # Main Area
64
+ st.title("🚗 Cars Knowledge Graph Explorer")
65
+ st.markdown("This application queries the RDF Knowledge Graph directly using **SPARQL**.")
66
+
67
+ # Construct Query based on filters
68
+ sparql_query = f"""
69
+ PREFIX ex: <http://example.org/cars/>
70
+ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
71
+
72
+ SELECT ?carName ?manuName ?price ?hp ?topSpeed ?seats
73
+ WHERE {{
74
+ ?car a ex:Car ;
75
+ rdfs:label ?carName ;
76
+ ex:hasManufacturer ?manu ;
77
+ ex:hasPriceValue ?price ;
78
+ ex:hasHorsePowerValue ?hp ;
79
+ ex:hasTopSpeedKMH ?topSpeed ;
80
+ ex:hasSeatCount ?seats .
81
+
82
+ ?manu rdfs:label ?manuName .
83
+
84
+ FILTER (?price <= {selected_price})
85
+ FILTER (?hp >= {selected_hp})
86
+ {f'FILTER (?manuName = "{selected_manu}")' if selected_manu != "All" else ""}
87
+ }}
88
+ ORDER BY DESC(?price)
89
+ LIMIT 100
90
+ """
91
+
92
+ # Run Query
93
+ results = g.query(sparql_query)
94
+
95
+ # Display Results
96
+ data = []
97
+ for row in results:
98
+ data.append({
99
+ "Car Model": str(row.carName),
100
+ "Manufacturer": str(row.manuName),
101
+ "Price ($)": f"${float(row.price):,.2f}",
102
+ "Horsepower": int(row.hp),
103
+ "Top Speed (km/h)": int(row.topSpeed),
104
+ "Seats": int(row.seats)
105
+ })
106
+
107
+ df = pd.DataFrame(data)
108
+
109
+ col1, col2, col3 = st.columns(3)
110
+ col1.metric("Total Cars Found", len(df))
111
+ col2.metric("Graph Triples", len(g))
112
+ col3.metric("Selected Manufacturer", selected_manu)
113
+
114
+ if not df.empty:
115
+ st.dataframe(df, use_container_width=True)
116
+ else:
117
+ st.info("No cars match your filters.")
118
+
119
+ # Advanced: Raw SPARQL
120
+ with st.expander("Run Custom SPARQL Query"):
121
+ custom_query = st.text_area("SPARQL Query", """
122
+ PREFIX ex: <http://example.org/cars/>
123
+ SELECT ?name ?price WHERE {
124
+ ?c ex:hasPriceValue ?price ;
125
+ rdfs:label ?name .
126
+ } LIMIT 5
127
+ """)
128
+ if st.button("Run Query"):
129
+ try:
130
+ raw_res = g.query(custom_query)
131
+ st.write(list(raw_res))
132
+ except Exception as e:
133
+ st.error(f"Error: {e}")
cars_knowledge_graph.ttl ADDED
The diff for this file is too large to render. See raw diff
 
cars_ontology.ttl ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @prefix ex: <http://example.org/cars/> .
2
+ @prefix owl: <http://www.w3.org/2002/07/owl#> .
3
+ @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
4
+
5
+ ex:Color a owl:Class .
6
+
7
+ ex:Coupe a owl:Class .
8
+
9
+ ex:DieselCar a owl:Class .
10
+
11
+ ex:EcoFriendlyCar a owl:Class .
12
+
13
+ ex:EconomyCar a owl:Class .
14
+
15
+ ex:ElectricCar a owl:Class ;
16
+ rdfs:subClassOf ex:Car .
17
+
18
+ ex:ElectricMotor a owl:Class .
19
+
20
+ ex:Engine a owl:Class ;
21
+ rdfs:subClassOf ex:Component .
22
+
23
+ ex:FiveSeaterCar a owl:Class .
24
+
25
+ ex:FourSeaterCar a owl:Class .
26
+
27
+ ex:FuelType a owl:Class .
28
+
29
+ ex:HybridCar a owl:Class .
30
+
31
+ ex:HyperCar a owl:Class .
32
+
33
+ ex:ICECar a owl:Class .
34
+
35
+ ex:LuxuryCar a owl:Class .
36
+
37
+ ex:LuxurySportCar a owl:Class .
38
+
39
+ ex:PassengerCar a owl:Class ;
40
+ rdfs:subClassOf ex:Car .
41
+
42
+ ex:PetrolCar a owl:Class ;
43
+ rdfs:subClassOf ex:Car .
44
+
45
+ ex:SUV a owl:Class .
46
+
47
+ ex:Sedan a owl:Class .
48
+
49
+ ex:SportsCar a owl:Class ;
50
+ rdfs:subClassOf ex:Car .
51
+
52
+ ex:SuperCar a owl:Class .
53
+
54
+ ex:TwoSeaterCar a owl:Class .
55
+
56
+ ex:V10Engine a owl:Class .
57
+
58
+ ex:V12Engine a owl:Class .
59
+
60
+ ex:V8Engine a owl:Class .
61
+
62
+ ex:VEngine a owl:Class .
63
+
64
+ ex:has0to100Sec a owl:DatatypeProperty ;
65
+ rdfs:domain ex:Car .
66
+
67
+ ex:hasCompetitor a owl:ObjectProperty .
68
+
69
+ ex:hasEngine a owl:ObjectProperty .
70
+
71
+ ex:hasEngineDisplacement a owl:DatatypeProperty ;
72
+ rdfs:domain ex:Car .
73
+
74
+ ex:hasHorsePowerValue a owl:DatatypeProperty ;
75
+ rdfs:domain ex:Car .
76
+
77
+ ex:hasManufacturer a owl:ObjectProperty ;
78
+ rdfs:domain ex:Car ;
79
+ rdfs:range ex:Manufacturer .
80
+
81
+ ex:hasPriceValue a owl:DatatypeProperty ;
82
+ rdfs:domain ex:Car .
83
+
84
+ ex:hasSeatCount a owl:DatatypeProperty ;
85
+ rdfs:domain ex:Car .
86
+
87
+ ex:hasTopSpeedKMH a owl:DatatypeProperty ;
88
+ rdfs:domain ex:Car .
89
+
90
+ ex:hasTorqueNm a owl:DatatypeProperty ;
91
+ rdfs:domain ex:Car .
92
+
93
+ ex:hasTrimLevel a owl:ObjectProperty .
94
+
95
+ ex:isModelVariantOf a owl:ObjectProperty .
96
+
97
+ ex:manufactures a owl:ObjectProperty .
98
+
99
+ ex:usesFuel a owl:ObjectProperty .
100
+
101
+ ex:Component a owl:Class .
102
+
103
+ ex:Manufacturer a owl:Class .
104
+
105
+ ex:Car a owl:Class .
106
+
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ rdflib
src/app.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from rdflib import Graph, Namespace
4
+ import os
5
+
6
+ # Page Config
7
+ st.set_page_config(page_title="Cars Knowledge Graph Demo", layout="wide")
8
+
9
+ # Load Graph
10
+ @st.cache_resource
11
+ def load_graph():
12
+ g = Graph()
13
+ graph_path = "cars_knowledge_graph.ttl"
14
+ if os.path.exists(graph_path):
15
+ g.parse(graph_path, format="turtle")
16
+ return g
17
+
18
+ try:
19
+ g = load_graph()
20
+ except Exception as e:
21
+ st.error(f"Failed to load graph: {e}")
22
+ st.stop()
23
+
24
+ if len(g) == 0:
25
+ st.warning("Graph is empty or not found. Please run 'src/convert_data.py' first.")
26
+ st.stop()
27
+
28
+ # Namespaces
29
+ EX = Namespace("http://example.org/cars/")
30
+
31
+ # Sidebar Filters
32
+ st.sidebar.header("Filter Cars")
33
+
34
+ # 1. Manufacturer Filter
35
+ manu_query = """
36
+ PREFIX ex: <http://example.org/cars/>
37
+ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
38
+ SELECT DISTINCT ?name WHERE {
39
+ ?m a ex:Manufacturer ; rdfs:label ?name .
40
+ } ORDER BY ?name
41
+ """
42
+ manufacturers = ["All"] + [str(row.name) for row in g.query(manu_query)]
43
+ selected_manu = st.sidebar.selectbox("Manufacturer", manufacturers)
44
+
45
+ # 2. Price Range
46
+ price_query = """
47
+ PREFIX ex: <http://example.org/cars/>
48
+ SELECT (MIN(?p) as ?min) (MAX(?p) as ?max) WHERE { ?s ex:hasPriceValue ?p }
49
+ """
50
+ price_res = list(g.query(price_query))[0]
51
+ min_price, max_price = float(price_res.min), float(price_res.max)
52
+ selected_price = st.sidebar.slider("Max Price (USD)", min_price, max_price, max_price)
53
+
54
+ # 3. Min Horsepower
55
+ hp_query = """
56
+ PREFIX ex: <http://example.org/cars/>
57
+ SELECT (MIN(?hp) as ?min) (MAX(?hp) as ?max) WHERE { ?s ex:hasHorsePowerValue ?hp }
58
+ """
59
+ hp_res = list(g.query(hp_query))[0]
60
+ min_hp, max_hp = int(hp_res.min), int(hp_res.max)
61
+ selected_hp = st.sidebar.slider("Min Horsepower", min_hp, max_hp, min_hp)
62
+
63
+ # Main Area
64
+ st.title("🚗 Cars Knowledge Graph Explorer")
65
+ st.markdown("This application queries the RDF Knowledge Graph directly using **SPARQL**.")
66
+
67
+ # Construct Query based on filters
68
+ sparql_query = f"""
69
+ PREFIX ex: <http://example.org/cars/>
70
+ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
71
+
72
+ SELECT ?carName ?manuName ?price ?hp ?topSpeed ?seats
73
+ WHERE {{
74
+ ?car a ex:Car ;
75
+ rdfs:label ?carName ;
76
+ ex:hasManufacturer ?manu ;
77
+ ex:hasPriceValue ?price ;
78
+ ex:hasHorsePowerValue ?hp ;
79
+ ex:hasTopSpeedKMH ?topSpeed ;
80
+ ex:hasSeatCount ?seats .
81
+
82
+ ?manu rdfs:label ?manuName .
83
+
84
+ FILTER (?price <= {selected_price})
85
+ FILTER (?hp >= {selected_hp})
86
+ {f'FILTER (?manuName = "{selected_manu}")' if selected_manu != "All" else ""}
87
+ }}
88
+ ORDER BY DESC(?price)
89
+ LIMIT 100
90
+ """
91
+
92
+ # Run Query
93
+ results = g.query(sparql_query)
94
+
95
+ # Display Results
96
+ data = []
97
+ for row in results:
98
+ data.append({
99
+ "Car Model": str(row.carName),
100
+ "Manufacturer": str(row.manuName),
101
+ "Price ($)": f"${float(row.price):,.2f}",
102
+ "Horsepower": int(row.hp),
103
+ "Top Speed (km/h)": int(row.topSpeed),
104
+ "Seats": int(row.seats)
105
+ })
106
+
107
+ df = pd.DataFrame(data)
108
+
109
+ col1, col2, col3 = st.columns(3)
110
+ col1.metric("Total Cars Found", len(df))
111
+ col2.metric("Graph Triples", len(g))
112
+ col3.metric("Selected Manufacturer", selected_manu)
113
+
114
+ if not df.empty:
115
+ st.dataframe(df, use_container_width=True)
116
+ else:
117
+ st.info("No cars match your filters.")
118
+
119
+ # Advanced: Raw SPARQL
120
+ with st.expander("Run Custom SPARQL Query"):
121
+ custom_query = st.text_area("SPARQL Query", """
122
+ PREFIX ex: <http://example.org/cars/>
123
+ SELECT ?name ?price WHERE {
124
+ ?c ex:hasPriceValue ?price ;
125
+ rdfs:label ?name .
126
+ } LIMIT 5
127
+ """)
128
+ if st.button("Run Query"):
129
+ try:
130
+ raw_res = g.query(custom_query)
131
+ st.write(list(raw_res))
132
+ except Exception as e:
133
+ st.error(f"Error: {e}")
src/convert_data.py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from rdflib import Graph, Namespace, RDF, RDFS, OWL, Literal, XSD, URIRef
3
+ import re
4
+ import os
5
+
6
+ # Define Namespace
7
+ EX = Namespace("http://example.org/cars/")
8
+
9
+ def clean_price(value):
10
+ if pd.isna(value): return 0.0
11
+ val_str = str(value).replace('$', '').replace(',', '').strip()
12
+ match = re.search(r'([\d\.]+)', val_str)
13
+ return float(match.group(1)) if match else 0.0
14
+
15
+ def clean_number(value):
16
+ if pd.isna(value): return 0
17
+ match = re.search(r'([\d\.,]+)', str(value))
18
+ if match:
19
+ return float(match.group(1).replace(',', ''))
20
+ return 0
21
+
22
+ def clean_seats(value):
23
+ if pd.isna(value): return 2
24
+ match = re.search(r'(\d+)', str(value))
25
+ return int(match.group(1)) if match else 2
26
+
27
+ def convert_data():
28
+ # Load or Create Graph
29
+ g = Graph()
30
+ g.bind("ex", EX)
31
+ g.bind("owl", OWL)
32
+ g.bind("rdfs", RDFS)
33
+
34
+ # Load Ontology T-Box (if exists, to keep definitions)
35
+ if os.path.exists("cars_ontology.ttl"):
36
+ g.parse("cars_ontology.ttl", format="turtle")
37
+
38
+ # DBpedia Mappings
39
+ dbpedia_manufacturers = {
40
+ "FERRARI": "http://dbpedia.org/resource/Ferrari",
41
+ "ROLLS ROYCE": "http://dbpedia.org/resource/Rolls-Royce_Motor_Cars",
42
+ "FORD": "http://dbpedia.org/resource/Ford_Motor_Company",
43
+ "MERCEDES": "http://dbpedia.org/resource/Mercedes-Benz",
44
+ "AUDI": "http://dbpedia.org/resource/Audi",
45
+ "BMW": "http://dbpedia.org/resource/BMW",
46
+ "ASTON MARTIN": "http://dbpedia.org/resource/Aston_Martin",
47
+ "BENTLEY": "http://dbpedia.org/resource/Bentley",
48
+ "LAMBORGHINI": "http://dbpedia.org/resource/Lamborghini",
49
+ "TOYOTA": "http://dbpedia.org/resource/Toyota",
50
+ "NISSAN": "http://dbpedia.org/resource/Nissan",
51
+ "VOLVO": "http://dbpedia.org/resource/Volvo_Cars",
52
+ "KIA": "http://dbpedia.org/resource/Kia",
53
+ "HONDA": "http://dbpedia.org/resource/Honda",
54
+ "HYUNDAI": "http://dbpedia.org/resource/Hyundai_Motor_Company",
55
+ "MAHINDRA": "http://dbpedia.org/resource/Mahindra_&_Mahindra",
56
+ "MARUTI SUZUKI": "http://dbpedia.org/resource/Maruti_Suzuki",
57
+ "VOLKSWAGEN": "http://dbpedia.org/resource/Volkswagen",
58
+ "PORSCHE": "http://dbpedia.org/resource/Porsche",
59
+ "CADILLAC": "http://dbpedia.org/resource/Cadillac",
60
+ "TATA MOTORS": "http://dbpedia.org/resource/Tata_Motors",
61
+ "TESLA": "http://dbpedia.org/resource/Tesla,_Inc.",
62
+ "JEEP": "http://dbpedia.org/resource/Jeep",
63
+ "MAZDA": "http://dbpedia.org/resource/Mazda",
64
+ "CHEVROLET": "http://dbpedia.org/resource/Chevrolet",
65
+ "GMC": "http://dbpedia.org/resource/GMC_(automobile)",
66
+ "PEUGEOT": "http://dbpedia.org/resource/Peugeot",
67
+ "BUGATTI": "http://dbpedia.org/resource/Bugatti_Automobiles",
68
+ "JAGUAR LAND ROVER": "http://dbpedia.org/resource/Jaguar_Land_Rover",
69
+ "ACURA": "http://dbpedia.org/resource/Acura",
70
+ "MITSUBISHI": "http://dbpedia.org/resource/Mitsubishi_Motors"
71
+ }
72
+
73
+ dbpedia_body = {
74
+ "Coupe": "http://dbpedia.org/resource/Coupe",
75
+ "Sedan": "http://dbpedia.org/resource/Sedan_(automobile)",
76
+ "SUV": "http://dbpedia.org/resource/Sport_utility_vehicle",
77
+ "SuperCar": "http://dbpedia.org/resource/Supercar",
78
+ "Car": "http://dbpedia.org/resource/Car"
79
+ }
80
+
81
+ # Fuel Mappings
82
+ dbpedia_fuels = {
83
+ "PETROL": "http://dbpedia.org/resource/Gasoline",
84
+ "DIESEL": "http://dbpedia.org/resource/Diesel_fuel",
85
+ "ELECTRIC": "http://dbpedia.org/resource/Electric_vehicle", # Linking to EV concept for fuel type context
86
+ "HYBRID": "http://dbpedia.org/resource/Hybrid_vehicle",
87
+ "PLUG-IN HYBRID": "http://dbpedia.org/resource/Plug-in_hybrid",
88
+ "HYDROGEN": "http://dbpedia.org/resource/Hydrogen_fuel",
89
+ "CNG": "http://dbpedia.org/resource/Compressed_natural_gas"
90
+ }
91
+
92
+ # Engine Mappings (Common types)
93
+ dbpedia_engines = {
94
+ "V8": "http://dbpedia.org/resource/V8_engine",
95
+ "V10": "http://dbpedia.org/resource/V10_engine",
96
+ "V12": "http://dbpedia.org/resource/V12_engine",
97
+ "V6": "http://dbpedia.org/resource/V6_engine",
98
+ "W12": "http://dbpedia.org/resource/W12_engine",
99
+ "W16": "http://dbpedia.org/resource/W16_engine",
100
+ "I4": "http://dbpedia.org/resource/Inline-four_engine",
101
+ "ELECTRIC": "http://dbpedia.org/resource/Electric_motor"
102
+ }
103
+
104
+ # Load CSV
105
+ csv_path = "../Cars Datasets 2025.csv"
106
+ if not os.path.exists(csv_path):
107
+ csv_path = "Cars Datasets 2025.csv"
108
+
109
+ try:
110
+ df = pd.read_csv(csv_path, encoding='latin1')
111
+ except Exception as e:
112
+ print(f"Error reading CSV: {e}")
113
+ return
114
+
115
+ print(f"Processing {len(df)} rows...")
116
+
117
+ for index, row in df.iterrows():
118
+ # Clean Data
119
+ car_name = str(row['Cars Names']).strip()
120
+ comp_name_raw = str(row['Company Names']).strip()
121
+ comp_name_upper = comp_name_raw.upper()
122
+
123
+ # Normalize Company Name for URI
124
+ comp_uri_suffix = comp_name_upper.replace(" ", "_")
125
+ comp_uri = EX[comp_uri_suffix]
126
+
127
+ car_uri = EX[car_name.replace(" ", "_").replace("/", "-").replace("(", "").replace(")", "")]
128
+
129
+ # Add Type
130
+ g.add((car_uri, RDF.type, EX.Car))
131
+ g.add((comp_uri, RDF.type, EX.Manufacturer))
132
+
133
+ # Interlinking: Manufacturer
134
+ if comp_name_upper in dbpedia_manufacturers:
135
+ g.add((comp_uri, OWL.sameAs, URIRef(dbpedia_manufacturers[comp_name_upper])))
136
+
137
+ # Fuel Type Logic
138
+ fuel_raw = str(row['Fuel Types']).strip()
139
+ fuel_clean = "PETROL" # Default
140
+ if "diesel" in fuel_raw.lower(): fuel_clean = "DIESEL"
141
+ elif "electric" in fuel_raw.lower() and "hybrid" not in fuel_raw.lower(): fuel_clean = "ELECTRIC"
142
+ elif "plug" in fuel_raw.lower(): fuel_clean = "PLUG-IN HYBRID"
143
+ elif "hybrid" in fuel_raw.lower(): fuel_clean = "HYBRID"
144
+ elif "hydrogen" in fuel_raw.lower(): fuel_clean = "HYDROGEN"
145
+ elif "cng" in fuel_raw.lower(): fuel_clean = "CNG"
146
+
147
+ fuel_uri = EX[fuel_clean.replace(" ", "_").replace("-", "_")]
148
+ g.add((fuel_uri, RDF.type, EX.FuelType))
149
+ g.add((car_uri, EX.usesFuel, fuel_uri))
150
+
151
+ if fuel_clean in dbpedia_fuels:
152
+ g.add((fuel_uri, OWL.sameAs, URIRef(dbpedia_fuels[fuel_clean])))
153
+
154
+ # Engine Logic
155
+ engine_raw = str(row['Engines']).strip()
156
+ engine_clean = "Engine"
157
+ if "v8" in engine_raw.lower(): engine_clean = "V8"
158
+ elif "v12" in engine_raw.lower(): engine_clean = "V12"
159
+ elif "v10" in engine_raw.lower(): engine_clean = "V10"
160
+ elif "v6" in engine_raw.lower(): engine_clean = "V6"
161
+ elif "w12" in engine_raw.lower(): engine_clean = "W12"
162
+ elif "w16" in engine_raw.lower(): engine_clean = "W16"
163
+
164
+ engine_uri = EX[engine_clean.replace(" ", "_")]
165
+ g.add((engine_uri, RDF.type, EX.Engine))
166
+ g.add((car_uri, EX.hasEngine, engine_uri))
167
+
168
+ if engine_clean in dbpedia_engines:
169
+ g.add((engine_uri, OWL.sameAs, URIRef(dbpedia_engines[engine_clean])))
170
+
171
+ # Determine Car Subclass & Interlinking
172
+ seats = clean_seats(row['Seats'])
173
+ price = clean_price(row['Cars Prices'])
174
+ top_speed = clean_number(row['Total Speed'])
175
+
176
+ car_type = EX.Car
177
+ if seats == 2:
178
+ car_type = EX.Coupe
179
+ g.add((car_uri, RDF.type, EX.Coupe))
180
+ g.add((EX.Coupe, OWL.sameAs, URIRef(dbpedia_body["Coupe"]))) # Class Level link (optional but good)
181
+ elif seats >= 4:
182
+ car_type = EX.Sedan
183
+ g.add((car_uri, RDF.type, EX.Sedan))
184
+ g.add((EX.Sedan, OWL.sameAs, URIRef(dbpedia_body["Sedan"])))
185
+
186
+ if top_speed > 300:
187
+ g.add((car_uri, RDF.type, EX.SuperCar))
188
+ g.add((EX.SuperCar, OWL.sameAs, URIRef(dbpedia_body["SuperCar"])))
189
+
190
+ # Add Properties
191
+ g.add((car_uri, EX.hasManufacturer, comp_uri))
192
+ g.add((car_uri, RDFS.label, Literal(car_name, datatype=XSD.string)))
193
+ g.add((comp_uri, RDFS.label, Literal(comp_name_raw, datatype=XSD.string)))
194
+ g.add((fuel_uri, RDFS.label, Literal(fuel_clean, datatype=XSD.string)))
195
+
196
+ g.add((car_uri, EX.hasPriceValue, Literal(price, datatype=XSD.float)))
197
+ g.add((car_uri, EX.hasSeatCount, Literal(seats, datatype=XSD.integer)))
198
+ g.add((car_uri, EX.hasTopSpeedKMH, Literal(int(top_speed), datatype=XSD.integer)))
199
+
200
+ hp = clean_number(row['HorsePower'])
201
+ g.add((car_uri, EX.hasHorsePowerValue, Literal(int(hp), datatype=XSD.integer)))
202
+
203
+ # Save Graph
204
+ g.serialize(destination="cars_knowledge_graph.ttl", format="turtle")
205
+ print(f"Knowledge Graph saved to cars_knowledge_graph.ttl with {len(g)} triples.")
206
+
207
+ if __name__ == "__main__":
208
+ convert_data()
src/ontology.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from rdflib import Graph, Namespace, RDF, RDFS, OWL, Literal, XSD
2
+
3
+ def create_ontology():
4
+ g = Graph()
5
+
6
+ # Define Namespace
7
+ EX = Namespace("http://example.org/cars/")
8
+ g.bind("ex", EX)
9
+
10
+ # Enable OWL/RDFS
11
+ g.bind("owl", OWL)
12
+
13
+ # ==========================
14
+ # CLASSES (Goal: 20+)
15
+ # ==========================
16
+ classes = [
17
+ "Car", "Component", "Engine", "FuelType", "Manufacturer",
18
+ "Color", "PassengerCar", "TwoSeaterCar", "FourSeaterCar",
19
+ "FiveSeaterCar", "Coupe", "Sedan", "SUV", "SportsCar",
20
+ "SuperCar", "HyperCar", "EconomyCar", "ElectricCar",
21
+ "ICECar", "HybridCar", "PetrolCar", "DieselCar",
22
+ "VEngine", "V8Engine", "V10Engine", "V12Engine",
23
+ "ElectricMotor", "EcoFriendlyCar", "LuxuryCar", "LuxurySportCar"
24
+ ]
25
+
26
+ for cls in classes:
27
+ g.add((EX[cls], RDF.type, OWL.Class))
28
+
29
+ # Class Hierarchy
30
+ g.add((EX.PassengerCar, RDFS.subClassOf, EX.Car))
31
+ g.add((EX.SportsCar, RDFS.subClassOf, EX.Car))
32
+ g.add((EX.Engine, RDFS.subClassOf, EX.Component))
33
+ g.add((EX.ElectricCar, RDFS.subClassOf, EX.Car))
34
+ g.add((EX.PetrolCar, RDFS.subClassOf, EX.Car))
35
+
36
+ # ==========================
37
+ # PROPERTIES (Goal: 7+ Object, 7+ Data)
38
+ # ==========================
39
+
40
+ # Object Properties
41
+ obj_props = [
42
+ "hasManufacturer", "manufactures", "hasEngine", "usesFuel",
43
+ "hasCompetitor", "isModelVariantOf", "hasTrimLevel"
44
+ ]
45
+
46
+ for prop in obj_props:
47
+ g.add((EX[prop], RDF.type, OWL.ObjectProperty))
48
+
49
+ # Domain/Range examples
50
+ g.add((EX.hasManufacturer, RDFS.domain, EX.Car))
51
+ g.add((EX.hasManufacturer, RDFS.range, EX.Manufacturer))
52
+
53
+ # Datatype Properties
54
+ data_props = [
55
+ "hasPriceValue", "hasHorsePowerValue", "hasTopSpeedKMH",
56
+ "hasSeatCount", "hasTorqueNm", "has0to100Sec", "hasEngineDisplacement"
57
+ ]
58
+
59
+ for prop in data_props:
60
+ g.add((EX[prop], RDF.type, OWL.DatatypeProperty))
61
+ g.add((EX[prop], RDFS.domain, EX.Car))
62
+
63
+ # Save Ontology T-Box
64
+ g.serialize(destination="cars_ontology.ttl", format="turtle")
65
+ print("Ontology T-Box saved to cars_ontology.ttl")
66
+
67
+ if __name__ == "__main__":
68
+ create_ontology()
src/publish.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Query, Request, HTTPException
2
+ from fastapi.responses import JSONResponse, HTMLResponse
3
+ from rdflib import Graph
4
+ import os
5
+ import uvicorn
6
+
7
+ app = FastAPI(title="Cars Knowledge Graph SPARQL Endpoint")
8
+
9
+ # Load the Knowledge Graph
10
+ g = Graph()
11
+ GRAPH_FILE = "cars_knowledge_graph.ttl"
12
+
13
+ if os.path.exists(GRAPH_FILE):
14
+ print(f"Loading Knowledge Graph from {GRAPH_FILE}...")
15
+ g.parse(GRAPH_FILE, format="turtle")
16
+ print(f"Graph loaded with {len(g)} triples.")
17
+ else:
18
+ print(f"WARNING: {GRAPH_FILE} not found. Please run convert_data.py first.")
19
+
20
+ @app.get("/", response_class=HTMLResponse)
21
+ async def home():
22
+ return """
23
+ <h1>Cars Knowledge Graph SPARQL Endpoint (FastAPI)</h1>
24
+ <p>The Knowledge Graph is published and accessible.</p>
25
+ <p>Send SPARQL queries to: <code>/sparql</code></p>
26
+ <h3>Example Query:</h3>
27
+ <pre>
28
+ SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT 10
29
+ </pre>
30
+ <p>View API Docs at: <a href="/docs">/docs</a></p>
31
+ """
32
+
33
+ async def run_query(query: str):
34
+ if not query:
35
+ raise HTTPException(status_code=400, detail="No query provided")
36
+
37
+ try:
38
+ results = g.query(query)
39
+
40
+ # Format results as JSON
41
+ res_list = []
42
+ for row in results:
43
+ res_dict = {}
44
+ if getattr(results, "vars", None):
45
+ for i, var in enumerate(results.vars):
46
+ if row[i] is not None:
47
+ res_dict[str(var)] = str(row[i])
48
+ res_list.append(res_dict)
49
+
50
+ return {"results": res_list}
51
+
52
+ except Exception as e:
53
+ raise HTTPException(status_code=500, detail=str(e))
54
+
55
+ @app.get("/sparql")
56
+ async def sparql_get(query: str = Query(..., description="SPARQL Query")):
57
+ return await run_query(query)
58
+
59
+ @app.post("/sparql")
60
+ async def sparql_post(request: Request):
61
+ # Handle both form data and raw body
62
+ content_type = request.headers.get("content-type", "")
63
+ if "application/x-www-form-urlencoded" in content_type:
64
+ form = await request.form()
65
+ query = form.get("query")
66
+ else:
67
+ # data = await request.json() # Optional depending on client
68
+ query = (await request.body()).decode("utf-8")
69
+
70
+ return await run_query(query)
71
+
72
+ if __name__ == "__main__":
73
+ print("Starting SPARQL Endpoint on http://localhost:8000")
74
+ uvicorn.run(app, host="0.0.0.0", port=8000)
src/validate.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from rdflib import Graph, Namespace
2
+ import pandas as pd
3
+
4
+ def run_validation():
5
+ g = Graph()
6
+ g.parse("cars_knowledge_graph.ttl", format="turtle")
7
+
8
+ EX = Namespace("http://example.org/cars/")
9
+
10
+ queries = {
11
+ "1. List all cars manufactured by Ferrari": """
12
+ PREFIX ex: <http://example.org/cars/>
13
+ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
14
+
15
+ SELECT ?car_name
16
+ WHERE {
17
+ ?car ex:hasManufacturer ?manu .
18
+ ?manu rdfs:label ?manu_name .
19
+ FILTER (REGEX(?manu_name, "Ferrari", "i"))
20
+ ?car rdfs:label ?car_name .
21
+ }
22
+ """,
23
+ "2. Cars with HorsePower > 800": """
24
+ PREFIX ex: <http://example.org/cars/>
25
+ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
26
+
27
+ SELECT ?car_name ?hp
28
+ WHERE {
29
+ ?car ex:hasHorsePowerValue ?hp .
30
+ ?car rdfs:label ?car_name .
31
+ FILTER (?hp > 800)
32
+ }
33
+ """,
34
+ "3. Count of 2-Seater cars": """
35
+ PREFIX ex: <http://example.org/cars/>
36
+
37
+ SELECT (COUNT(?car) as ?count)
38
+ WHERE {
39
+ ?car a ex:Coupe .
40
+ }
41
+ """,
42
+ "4. Average Price of cars": """
43
+ PREFIX ex: <http://example.org/cars/>
44
+
45
+ SELECT (AVG(?price) as ?avg_price)
46
+ WHERE {
47
+ ?car ex:hasPriceValue ?price .
48
+ }
49
+ """,
50
+ "5. (Federated) Get Manufacturer Description from DBpedia": """
51
+ PREFIX ex: <http://example.org/cars/>
52
+ PREFIX owl: <http://www.w3.org/2002/07/owl#>
53
+ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
54
+ PREFIX dbo: <http://dbpedia.org/ontology/>
55
+
56
+ SELECT ?manu_name ?desc
57
+ WHERE {
58
+ ?manu a ex:Manufacturer ;
59
+ rdfs:label ?manu_name ;
60
+ owl:sameAs ?dbpedia_link .
61
+
62
+ SERVICE <http://dbpedia.org/sparql> {
63
+ ?dbpedia_link dbo:abstract ?desc .
64
+ FILTER (LANG(?desc) = 'en')
65
+ }
66
+ }
67
+ LIMIT 3
68
+ """
69
+ }
70
+
71
+ print(f"Loaded Graph with {len(g)} triples.\n")
72
+
73
+ # Run Local Queries
74
+ for title, query in list(queries.items())[:-1]: # Skip the last one for special handling
75
+ print(f"--- {title} ---")
76
+ try:
77
+ results = g.query(query)
78
+ for row in results:
79
+ print(row)
80
+ except Exception as e:
81
+ print(f"Query Error: {e}")
82
+ print("\n")
83
+
84
+ # Run Federated Query (Special Handling)
85
+ print(f"--- 5. (Federated) Get Manufacturer Description from DBpedia ---")
86
+ print("Attempting to run SERVICE query via rdflib (may fail due to DBpedia restrictions)...")
87
+ fed_query = queries["5. (Federated) Get Manufacturer Description from DBpedia"]
88
+
89
+ try:
90
+ results = g.query(fed_query)
91
+ for row in results:
92
+ print(row)
93
+ except Exception as e:
94
+ print(f"Standard SERVICE query failed ({e}).\nData is likely interlinked, but local engine cannot negotiate with DBpedia.")
95
+ print("Fallback: Verifying Interlinking manually via SPARQLWrapper...")
96
+
97
+ try:
98
+ from SPARQLWrapper import SPARQLWrapper, JSON
99
+ sparql = SPARQLWrapper("http://dbpedia.org/sparql")
100
+
101
+ # Get a manufacturer link from our graph
102
+ manu_link_query = """
103
+ PREFIX ex: <http://example.org/cars/>
104
+ PREFIX owl: <http://www.w3.org/2002/07/owl#>
105
+ SELECT ?link WHERE { ?m a ex:Manufacturer ; owl:sameAs ?link . } LIMIT 1
106
+ """
107
+ res = g.query(manu_link_query)
108
+ link = list(res)[0][0]
109
+ print(f"Found Interlink: {link}")
110
+
111
+ # Query DBpedia for that link
112
+ dbpedia_q = f"""
113
+ PREFIX dbo: <http://dbpedia.org/ontology/>
114
+ SELECT ?desc WHERE {{ <{link}> dbo:abstract ?desc . FILTER (LANG(?desc) = 'en') }} LIMIT 1
115
+ """
116
+ sparql.setQuery(dbpedia_q)
117
+ sparql.setReturnFormat(JSON)
118
+ results = sparql.query().convert()
119
+
120
+ for result in results["results"]["bindings"]:
121
+ print(f"Description from DBpedia: {result['desc']['value'][:200]}...")
122
+
123
+ except ImportError:
124
+ print("SPARQLWrapper not installed. Cannot run fallback.")
125
+ except Exception as ex:
126
+ print(f"Fallback failed: {ex}")
127
+
128
+ if __name__ == "__main__":
129
+ run_validation()