umer6016
commited on
Commit
·
1d12e97
0
Parent(s):
Fresh Deploy
Browse files- .gitignore +10 -0
- Dockerfile +19 -0
- README.md +67 -0
- app.py +133 -0
- cars_knowledge_graph.ttl +0 -0
- cars_ontology.ttl +106 -0
- requirements.txt +3 -0
- src/app.py +133 -0
- src/convert_data.py +208 -0
- src/ontology.py +68 -0
- src/publish.py +74 -0
- src/validate.py +129 -0
.gitignore
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
# Ignore PDF files
|
| 3 |
+
*.pdf
|
| 4 |
+
|
| 5 |
+
# Ignore Python cache
|
| 6 |
+
__pycache__/
|
| 7 |
+
*.pyc
|
| 8 |
+
|
| 9 |
+
# Ignore simple artifacts
|
| 10 |
+
*.csv
|
Dockerfile
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
# Install dependencies
|
| 6 |
+
COPY requirements.txt .
|
| 7 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 8 |
+
|
| 9 |
+
# Copy application code and data
|
| 10 |
+
COPY app.py .
|
| 11 |
+
COPY cars_knowledge_graph.ttl .
|
| 12 |
+
COPY cars_ontology.ttl .
|
| 13 |
+
COPY src/ src/
|
| 14 |
+
|
| 15 |
+
# Expose Streamlit port
|
| 16 |
+
EXPOSE 7860
|
| 17 |
+
|
| 18 |
+
# Run the application
|
| 19 |
+
CMD ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]
|
README.md
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Cars Knowledge Graph
|
| 3 |
+
emoji: 🚗
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: red
|
| 6 |
+
sdk: docker
|
| 7 |
+
pinned: false
|
| 8 |
+
---
|
| 9 |
+
|
| 10 |
+
# Knowledge Representation Project: Cars Knowledge Graph
|
| 11 |
+
|
| 12 |
+
## 1. Introduction & Motivation
|
| 13 |
+
This project transforms a flat CSV dataset of **Car Specifications (2025)** into a semantic **Knowledge Graph (RDF/OWL)**.
|
| 14 |
+
**Domain**: Automotive Industry (Cars, Manufacturers, Engines, Performance).
|
| 15 |
+
**Motivation**: To enable complex querying of car data that is not possible with simple tabular lookups, such as inferring "High Performance" vehicles or categorizing cars by complex criteria (e.g., specific engine types + price ranges).
|
| 16 |
+
|
| 17 |
+
## 2. Ontology Design (Conceptual Model)
|
| 18 |
+
The Ontology is designed using **RDFLib** and conforms to the project requirements (20+ Classes, 7+ Properties).
|
| 19 |
+
|
| 20 |
+
### Core Classes
|
| 21 |
+
- **Car**: The central entity.
|
| 22 |
+
- **Subclasses**: `SportsCar`, `HyperCar`, `Sedan`, `Coupe`, `ElectricCar`, `PetrolCar`, `LuxuryCar`.
|
| 23 |
+
- **Manufacturer**: Companies like Ferrari, Tesla, Bugatti.
|
| 24 |
+
- **Component**: `Engine`, `V8Engine`, `V12Engine`.
|
| 25 |
+
- **defined Classes**:
|
| 26 |
+
- `SuperCar`: Cars with Top Speed > 300 km/h.
|
| 27 |
+
|
| 28 |
+
### Properties
|
| 29 |
+
- **Object Properties**: `hasManufacturer`, `hasEngine`, `usesFuel`, `manufactures`.
|
| 30 |
+
- **Datatype Properties**: `hasPriceValue`, `hasHorsePowerValue`, `hasSeatCount`, `hasTopSpeedKMH`.
|
| 31 |
+
|
| 32 |
+
## 3. Knowledge Graph Construction
|
| 33 |
+
- **Source**: `Cars Datasets 2025.csv`
|
| 34 |
+
- **Output**: `cars_knowledge_graph.ttl` (Turtle Syntax)
|
| 35 |
+
- **Triples Generated**: ~9,842
|
| 36 |
+
|
| 37 |
+
## 4. Competency Questions & Validation
|
| 38 |
+
The following questions guided the design and were validated via SPARQL:
|
| 39 |
+
|
| 40 |
+
1. **"List all cars manufactured by Ferrari"**
|
| 41 |
+
- Verified: Returns models like `SF90 STRADALE`, `ROMA`, `812 GTS`.
|
| 42 |
+
2. **"Which cars have > 800 HorsePower?"**
|
| 43 |
+
- Verified: Returns `Bugatti Chiron`, `Tesla Roadster 2`, etc.
|
| 44 |
+
3. **"Count of 2-Seater Coupes"**
|
| 45 |
+
- Result: 147 vehicles.
|
| 46 |
+
4. **"What is the average price of all cars?"**
|
| 47 |
+
- Result: ~$137,193 USD.
|
| 48 |
+
|
| 49 |
+
## 5. How to Run
|
| 50 |
+
### Prerequisites
|
| 51 |
+
- Python 3.x
|
| 52 |
+
- Libraries: `rdflib`, `pandas`
|
| 53 |
+
|
| 54 |
+
### Steps
|
| 55 |
+
1. **Install Dependencies**:
|
| 56 |
+
```bash
|
| 57 |
+
pip install rdflib pandas
|
| 58 |
+
```
|
| 59 |
+
2. **Generate Knowledge Graph**:
|
| 60 |
+
```bash
|
| 61 |
+
python src/ontology.py # Generates Ontology Schema
|
| 62 |
+
python src/convert_data.py # Generates Graph from CSV
|
| 63 |
+
```
|
| 64 |
+
3. **Run Validation Queries**:
|
| 65 |
+
```bash
|
| 66 |
+
python src/validate.py
|
| 67 |
+
```
|
app.py
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
from rdflib import Graph, Namespace
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
# Page Config
|
| 7 |
+
st.set_page_config(page_title="Cars Knowledge Graph Demo", layout="wide")
|
| 8 |
+
|
| 9 |
+
# Load Graph
|
| 10 |
+
@st.cache_resource
|
| 11 |
+
def load_graph():
|
| 12 |
+
g = Graph()
|
| 13 |
+
graph_path = "cars_knowledge_graph.ttl"
|
| 14 |
+
if os.path.exists(graph_path):
|
| 15 |
+
g.parse(graph_path, format="turtle")
|
| 16 |
+
return g
|
| 17 |
+
|
| 18 |
+
try:
|
| 19 |
+
g = load_graph()
|
| 20 |
+
except Exception as e:
|
| 21 |
+
st.error(f"Failed to load graph: {e}")
|
| 22 |
+
st.stop()
|
| 23 |
+
|
| 24 |
+
if len(g) == 0:
|
| 25 |
+
st.warning("Graph is empty or not found. Please run 'src/convert_data.py' first.")
|
| 26 |
+
st.stop()
|
| 27 |
+
|
| 28 |
+
# Namespaces
|
| 29 |
+
EX = Namespace("http://example.org/cars/")
|
| 30 |
+
|
| 31 |
+
# Sidebar Filters
|
| 32 |
+
st.sidebar.header("Filter Cars")
|
| 33 |
+
|
| 34 |
+
# 1. Manufacturer Filter
|
| 35 |
+
manu_query = """
|
| 36 |
+
PREFIX ex: <http://example.org/cars/>
|
| 37 |
+
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|
| 38 |
+
SELECT DISTINCT ?name WHERE {
|
| 39 |
+
?m a ex:Manufacturer ; rdfs:label ?name .
|
| 40 |
+
} ORDER BY ?name
|
| 41 |
+
"""
|
| 42 |
+
manufacturers = ["All"] + [str(row.name) for row in g.query(manu_query)]
|
| 43 |
+
selected_manu = st.sidebar.selectbox("Manufacturer", manufacturers)
|
| 44 |
+
|
| 45 |
+
# 2. Price Range
|
| 46 |
+
price_query = """
|
| 47 |
+
PREFIX ex: <http://example.org/cars/>
|
| 48 |
+
SELECT (MIN(?p) as ?min) (MAX(?p) as ?max) WHERE { ?s ex:hasPriceValue ?p }
|
| 49 |
+
"""
|
| 50 |
+
price_res = list(g.query(price_query))[0]
|
| 51 |
+
min_price, max_price = float(price_res.min), float(price_res.max)
|
| 52 |
+
selected_price = st.sidebar.slider("Max Price (USD)", min_price, max_price, max_price)
|
| 53 |
+
|
| 54 |
+
# 3. Min Horsepower
|
| 55 |
+
hp_query = """
|
| 56 |
+
PREFIX ex: <http://example.org/cars/>
|
| 57 |
+
SELECT (MIN(?hp) as ?min) (MAX(?hp) as ?max) WHERE { ?s ex:hasHorsePowerValue ?hp }
|
| 58 |
+
"""
|
| 59 |
+
hp_res = list(g.query(hp_query))[0]
|
| 60 |
+
min_hp, max_hp = int(hp_res.min), int(hp_res.max)
|
| 61 |
+
selected_hp = st.sidebar.slider("Min Horsepower", min_hp, max_hp, min_hp)
|
| 62 |
+
|
| 63 |
+
# Main Area
|
| 64 |
+
st.title("🚗 Cars Knowledge Graph Explorer")
|
| 65 |
+
st.markdown("This application queries the RDF Knowledge Graph directly using **SPARQL**.")
|
| 66 |
+
|
| 67 |
+
# Construct Query based on filters
|
| 68 |
+
sparql_query = f"""
|
| 69 |
+
PREFIX ex: <http://example.org/cars/>
|
| 70 |
+
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|
| 71 |
+
|
| 72 |
+
SELECT ?carName ?manuName ?price ?hp ?topSpeed ?seats
|
| 73 |
+
WHERE {{
|
| 74 |
+
?car a ex:Car ;
|
| 75 |
+
rdfs:label ?carName ;
|
| 76 |
+
ex:hasManufacturer ?manu ;
|
| 77 |
+
ex:hasPriceValue ?price ;
|
| 78 |
+
ex:hasHorsePowerValue ?hp ;
|
| 79 |
+
ex:hasTopSpeedKMH ?topSpeed ;
|
| 80 |
+
ex:hasSeatCount ?seats .
|
| 81 |
+
|
| 82 |
+
?manu rdfs:label ?manuName .
|
| 83 |
+
|
| 84 |
+
FILTER (?price <= {selected_price})
|
| 85 |
+
FILTER (?hp >= {selected_hp})
|
| 86 |
+
{f'FILTER (?manuName = "{selected_manu}")' if selected_manu != "All" else ""}
|
| 87 |
+
}}
|
| 88 |
+
ORDER BY DESC(?price)
|
| 89 |
+
LIMIT 100
|
| 90 |
+
"""
|
| 91 |
+
|
| 92 |
+
# Run Query
|
| 93 |
+
results = g.query(sparql_query)
|
| 94 |
+
|
| 95 |
+
# Display Results
|
| 96 |
+
data = []
|
| 97 |
+
for row in results:
|
| 98 |
+
data.append({
|
| 99 |
+
"Car Model": str(row.carName),
|
| 100 |
+
"Manufacturer": str(row.manuName),
|
| 101 |
+
"Price ($)": f"${float(row.price):,.2f}",
|
| 102 |
+
"Horsepower": int(row.hp),
|
| 103 |
+
"Top Speed (km/h)": int(row.topSpeed),
|
| 104 |
+
"Seats": int(row.seats)
|
| 105 |
+
})
|
| 106 |
+
|
| 107 |
+
df = pd.DataFrame(data)
|
| 108 |
+
|
| 109 |
+
col1, col2, col3 = st.columns(3)
|
| 110 |
+
col1.metric("Total Cars Found", len(df))
|
| 111 |
+
col2.metric("Graph Triples", len(g))
|
| 112 |
+
col3.metric("Selected Manufacturer", selected_manu)
|
| 113 |
+
|
| 114 |
+
if not df.empty:
|
| 115 |
+
st.dataframe(df, use_container_width=True)
|
| 116 |
+
else:
|
| 117 |
+
st.info("No cars match your filters.")
|
| 118 |
+
|
| 119 |
+
# Advanced: Raw SPARQL
|
| 120 |
+
with st.expander("Run Custom SPARQL Query"):
|
| 121 |
+
custom_query = st.text_area("SPARQL Query", """
|
| 122 |
+
PREFIX ex: <http://example.org/cars/>
|
| 123 |
+
SELECT ?name ?price WHERE {
|
| 124 |
+
?c ex:hasPriceValue ?price ;
|
| 125 |
+
rdfs:label ?name .
|
| 126 |
+
} LIMIT 5
|
| 127 |
+
""")
|
| 128 |
+
if st.button("Run Query"):
|
| 129 |
+
try:
|
| 130 |
+
raw_res = g.query(custom_query)
|
| 131 |
+
st.write(list(raw_res))
|
| 132 |
+
except Exception as e:
|
| 133 |
+
st.error(f"Error: {e}")
|
cars_knowledge_graph.ttl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
cars_ontology.ttl
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
@prefix ex: <http://example.org/cars/> .
|
| 2 |
+
@prefix owl: <http://www.w3.org/2002/07/owl#> .
|
| 3 |
+
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
|
| 4 |
+
|
| 5 |
+
ex:Color a owl:Class .
|
| 6 |
+
|
| 7 |
+
ex:Coupe a owl:Class .
|
| 8 |
+
|
| 9 |
+
ex:DieselCar a owl:Class .
|
| 10 |
+
|
| 11 |
+
ex:EcoFriendlyCar a owl:Class .
|
| 12 |
+
|
| 13 |
+
ex:EconomyCar a owl:Class .
|
| 14 |
+
|
| 15 |
+
ex:ElectricCar a owl:Class ;
|
| 16 |
+
rdfs:subClassOf ex:Car .
|
| 17 |
+
|
| 18 |
+
ex:ElectricMotor a owl:Class .
|
| 19 |
+
|
| 20 |
+
ex:Engine a owl:Class ;
|
| 21 |
+
rdfs:subClassOf ex:Component .
|
| 22 |
+
|
| 23 |
+
ex:FiveSeaterCar a owl:Class .
|
| 24 |
+
|
| 25 |
+
ex:FourSeaterCar a owl:Class .
|
| 26 |
+
|
| 27 |
+
ex:FuelType a owl:Class .
|
| 28 |
+
|
| 29 |
+
ex:HybridCar a owl:Class .
|
| 30 |
+
|
| 31 |
+
ex:HyperCar a owl:Class .
|
| 32 |
+
|
| 33 |
+
ex:ICECar a owl:Class .
|
| 34 |
+
|
| 35 |
+
ex:LuxuryCar a owl:Class .
|
| 36 |
+
|
| 37 |
+
ex:LuxurySportCar a owl:Class .
|
| 38 |
+
|
| 39 |
+
ex:PassengerCar a owl:Class ;
|
| 40 |
+
rdfs:subClassOf ex:Car .
|
| 41 |
+
|
| 42 |
+
ex:PetrolCar a owl:Class ;
|
| 43 |
+
rdfs:subClassOf ex:Car .
|
| 44 |
+
|
| 45 |
+
ex:SUV a owl:Class .
|
| 46 |
+
|
| 47 |
+
ex:Sedan a owl:Class .
|
| 48 |
+
|
| 49 |
+
ex:SportsCar a owl:Class ;
|
| 50 |
+
rdfs:subClassOf ex:Car .
|
| 51 |
+
|
| 52 |
+
ex:SuperCar a owl:Class .
|
| 53 |
+
|
| 54 |
+
ex:TwoSeaterCar a owl:Class .
|
| 55 |
+
|
| 56 |
+
ex:V10Engine a owl:Class .
|
| 57 |
+
|
| 58 |
+
ex:V12Engine a owl:Class .
|
| 59 |
+
|
| 60 |
+
ex:V8Engine a owl:Class .
|
| 61 |
+
|
| 62 |
+
ex:VEngine a owl:Class .
|
| 63 |
+
|
| 64 |
+
ex:has0to100Sec a owl:DatatypeProperty ;
|
| 65 |
+
rdfs:domain ex:Car .
|
| 66 |
+
|
| 67 |
+
ex:hasCompetitor a owl:ObjectProperty .
|
| 68 |
+
|
| 69 |
+
ex:hasEngine a owl:ObjectProperty .
|
| 70 |
+
|
| 71 |
+
ex:hasEngineDisplacement a owl:DatatypeProperty ;
|
| 72 |
+
rdfs:domain ex:Car .
|
| 73 |
+
|
| 74 |
+
ex:hasHorsePowerValue a owl:DatatypeProperty ;
|
| 75 |
+
rdfs:domain ex:Car .
|
| 76 |
+
|
| 77 |
+
ex:hasManufacturer a owl:ObjectProperty ;
|
| 78 |
+
rdfs:domain ex:Car ;
|
| 79 |
+
rdfs:range ex:Manufacturer .
|
| 80 |
+
|
| 81 |
+
ex:hasPriceValue a owl:DatatypeProperty ;
|
| 82 |
+
rdfs:domain ex:Car .
|
| 83 |
+
|
| 84 |
+
ex:hasSeatCount a owl:DatatypeProperty ;
|
| 85 |
+
rdfs:domain ex:Car .
|
| 86 |
+
|
| 87 |
+
ex:hasTopSpeedKMH a owl:DatatypeProperty ;
|
| 88 |
+
rdfs:domain ex:Car .
|
| 89 |
+
|
| 90 |
+
ex:hasTorqueNm a owl:DatatypeProperty ;
|
| 91 |
+
rdfs:domain ex:Car .
|
| 92 |
+
|
| 93 |
+
ex:hasTrimLevel a owl:ObjectProperty .
|
| 94 |
+
|
| 95 |
+
ex:isModelVariantOf a owl:ObjectProperty .
|
| 96 |
+
|
| 97 |
+
ex:manufactures a owl:ObjectProperty .
|
| 98 |
+
|
| 99 |
+
ex:usesFuel a owl:ObjectProperty .
|
| 100 |
+
|
| 101 |
+
ex:Component a owl:Class .
|
| 102 |
+
|
| 103 |
+
ex:Manufacturer a owl:Class .
|
| 104 |
+
|
| 105 |
+
ex:Car a owl:Class .
|
| 106 |
+
|
requirements.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit
|
| 2 |
+
pandas
|
| 3 |
+
rdflib
|
src/app.py
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
from rdflib import Graph, Namespace
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
# Page Config
|
| 7 |
+
st.set_page_config(page_title="Cars Knowledge Graph Demo", layout="wide")
|
| 8 |
+
|
| 9 |
+
# Load Graph
|
| 10 |
+
@st.cache_resource
|
| 11 |
+
def load_graph():
|
| 12 |
+
g = Graph()
|
| 13 |
+
graph_path = "cars_knowledge_graph.ttl"
|
| 14 |
+
if os.path.exists(graph_path):
|
| 15 |
+
g.parse(graph_path, format="turtle")
|
| 16 |
+
return g
|
| 17 |
+
|
| 18 |
+
try:
|
| 19 |
+
g = load_graph()
|
| 20 |
+
except Exception as e:
|
| 21 |
+
st.error(f"Failed to load graph: {e}")
|
| 22 |
+
st.stop()
|
| 23 |
+
|
| 24 |
+
if len(g) == 0:
|
| 25 |
+
st.warning("Graph is empty or not found. Please run 'src/convert_data.py' first.")
|
| 26 |
+
st.stop()
|
| 27 |
+
|
| 28 |
+
# Namespaces
|
| 29 |
+
EX = Namespace("http://example.org/cars/")
|
| 30 |
+
|
| 31 |
+
# Sidebar Filters
|
| 32 |
+
st.sidebar.header("Filter Cars")
|
| 33 |
+
|
| 34 |
+
# 1. Manufacturer Filter
|
| 35 |
+
manu_query = """
|
| 36 |
+
PREFIX ex: <http://example.org/cars/>
|
| 37 |
+
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|
| 38 |
+
SELECT DISTINCT ?name WHERE {
|
| 39 |
+
?m a ex:Manufacturer ; rdfs:label ?name .
|
| 40 |
+
} ORDER BY ?name
|
| 41 |
+
"""
|
| 42 |
+
manufacturers = ["All"] + [str(row.name) for row in g.query(manu_query)]
|
| 43 |
+
selected_manu = st.sidebar.selectbox("Manufacturer", manufacturers)
|
| 44 |
+
|
| 45 |
+
# 2. Price Range
|
| 46 |
+
price_query = """
|
| 47 |
+
PREFIX ex: <http://example.org/cars/>
|
| 48 |
+
SELECT (MIN(?p) as ?min) (MAX(?p) as ?max) WHERE { ?s ex:hasPriceValue ?p }
|
| 49 |
+
"""
|
| 50 |
+
price_res = list(g.query(price_query))[0]
|
| 51 |
+
min_price, max_price = float(price_res.min), float(price_res.max)
|
| 52 |
+
selected_price = st.sidebar.slider("Max Price (USD)", min_price, max_price, max_price)
|
| 53 |
+
|
| 54 |
+
# 3. Min Horsepower
|
| 55 |
+
hp_query = """
|
| 56 |
+
PREFIX ex: <http://example.org/cars/>
|
| 57 |
+
SELECT (MIN(?hp) as ?min) (MAX(?hp) as ?max) WHERE { ?s ex:hasHorsePowerValue ?hp }
|
| 58 |
+
"""
|
| 59 |
+
hp_res = list(g.query(hp_query))[0]
|
| 60 |
+
min_hp, max_hp = int(hp_res.min), int(hp_res.max)
|
| 61 |
+
selected_hp = st.sidebar.slider("Min Horsepower", min_hp, max_hp, min_hp)
|
| 62 |
+
|
| 63 |
+
# Main Area
|
| 64 |
+
st.title("🚗 Cars Knowledge Graph Explorer")
|
| 65 |
+
st.markdown("This application queries the RDF Knowledge Graph directly using **SPARQL**.")
|
| 66 |
+
|
| 67 |
+
# Construct Query based on filters
|
| 68 |
+
sparql_query = f"""
|
| 69 |
+
PREFIX ex: <http://example.org/cars/>
|
| 70 |
+
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|
| 71 |
+
|
| 72 |
+
SELECT ?carName ?manuName ?price ?hp ?topSpeed ?seats
|
| 73 |
+
WHERE {{
|
| 74 |
+
?car a ex:Car ;
|
| 75 |
+
rdfs:label ?carName ;
|
| 76 |
+
ex:hasManufacturer ?manu ;
|
| 77 |
+
ex:hasPriceValue ?price ;
|
| 78 |
+
ex:hasHorsePowerValue ?hp ;
|
| 79 |
+
ex:hasTopSpeedKMH ?topSpeed ;
|
| 80 |
+
ex:hasSeatCount ?seats .
|
| 81 |
+
|
| 82 |
+
?manu rdfs:label ?manuName .
|
| 83 |
+
|
| 84 |
+
FILTER (?price <= {selected_price})
|
| 85 |
+
FILTER (?hp >= {selected_hp})
|
| 86 |
+
{f'FILTER (?manuName = "{selected_manu}")' if selected_manu != "All" else ""}
|
| 87 |
+
}}
|
| 88 |
+
ORDER BY DESC(?price)
|
| 89 |
+
LIMIT 100
|
| 90 |
+
"""
|
| 91 |
+
|
| 92 |
+
# Run Query
|
| 93 |
+
results = g.query(sparql_query)
|
| 94 |
+
|
| 95 |
+
# Display Results
|
| 96 |
+
data = []
|
| 97 |
+
for row in results:
|
| 98 |
+
data.append({
|
| 99 |
+
"Car Model": str(row.carName),
|
| 100 |
+
"Manufacturer": str(row.manuName),
|
| 101 |
+
"Price ($)": f"${float(row.price):,.2f}",
|
| 102 |
+
"Horsepower": int(row.hp),
|
| 103 |
+
"Top Speed (km/h)": int(row.topSpeed),
|
| 104 |
+
"Seats": int(row.seats)
|
| 105 |
+
})
|
| 106 |
+
|
| 107 |
+
df = pd.DataFrame(data)
|
| 108 |
+
|
| 109 |
+
col1, col2, col3 = st.columns(3)
|
| 110 |
+
col1.metric("Total Cars Found", len(df))
|
| 111 |
+
col2.metric("Graph Triples", len(g))
|
| 112 |
+
col3.metric("Selected Manufacturer", selected_manu)
|
| 113 |
+
|
| 114 |
+
if not df.empty:
|
| 115 |
+
st.dataframe(df, use_container_width=True)
|
| 116 |
+
else:
|
| 117 |
+
st.info("No cars match your filters.")
|
| 118 |
+
|
| 119 |
+
# Advanced: Raw SPARQL
|
| 120 |
+
with st.expander("Run Custom SPARQL Query"):
|
| 121 |
+
custom_query = st.text_area("SPARQL Query", """
|
| 122 |
+
PREFIX ex: <http://example.org/cars/>
|
| 123 |
+
SELECT ?name ?price WHERE {
|
| 124 |
+
?c ex:hasPriceValue ?price ;
|
| 125 |
+
rdfs:label ?name .
|
| 126 |
+
} LIMIT 5
|
| 127 |
+
""")
|
| 128 |
+
if st.button("Run Query"):
|
| 129 |
+
try:
|
| 130 |
+
raw_res = g.query(custom_query)
|
| 131 |
+
st.write(list(raw_res))
|
| 132 |
+
except Exception as e:
|
| 133 |
+
st.error(f"Error: {e}")
|
src/convert_data.py
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
from rdflib import Graph, Namespace, RDF, RDFS, OWL, Literal, XSD, URIRef
|
| 3 |
+
import re
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
# Define Namespace
|
| 7 |
+
EX = Namespace("http://example.org/cars/")
|
| 8 |
+
|
| 9 |
+
def clean_price(value):
|
| 10 |
+
if pd.isna(value): return 0.0
|
| 11 |
+
val_str = str(value).replace('$', '').replace(',', '').strip()
|
| 12 |
+
match = re.search(r'([\d\.]+)', val_str)
|
| 13 |
+
return float(match.group(1)) if match else 0.0
|
| 14 |
+
|
| 15 |
+
def clean_number(value):
|
| 16 |
+
if pd.isna(value): return 0
|
| 17 |
+
match = re.search(r'([\d\.,]+)', str(value))
|
| 18 |
+
if match:
|
| 19 |
+
return float(match.group(1).replace(',', ''))
|
| 20 |
+
return 0
|
| 21 |
+
|
| 22 |
+
def clean_seats(value):
|
| 23 |
+
if pd.isna(value): return 2
|
| 24 |
+
match = re.search(r'(\d+)', str(value))
|
| 25 |
+
return int(match.group(1)) if match else 2
|
| 26 |
+
|
| 27 |
+
def convert_data():
|
| 28 |
+
# Load or Create Graph
|
| 29 |
+
g = Graph()
|
| 30 |
+
g.bind("ex", EX)
|
| 31 |
+
g.bind("owl", OWL)
|
| 32 |
+
g.bind("rdfs", RDFS)
|
| 33 |
+
|
| 34 |
+
# Load Ontology T-Box (if exists, to keep definitions)
|
| 35 |
+
if os.path.exists("cars_ontology.ttl"):
|
| 36 |
+
g.parse("cars_ontology.ttl", format="turtle")
|
| 37 |
+
|
| 38 |
+
# DBpedia Mappings
|
| 39 |
+
dbpedia_manufacturers = {
|
| 40 |
+
"FERRARI": "http://dbpedia.org/resource/Ferrari",
|
| 41 |
+
"ROLLS ROYCE": "http://dbpedia.org/resource/Rolls-Royce_Motor_Cars",
|
| 42 |
+
"FORD": "http://dbpedia.org/resource/Ford_Motor_Company",
|
| 43 |
+
"MERCEDES": "http://dbpedia.org/resource/Mercedes-Benz",
|
| 44 |
+
"AUDI": "http://dbpedia.org/resource/Audi",
|
| 45 |
+
"BMW": "http://dbpedia.org/resource/BMW",
|
| 46 |
+
"ASTON MARTIN": "http://dbpedia.org/resource/Aston_Martin",
|
| 47 |
+
"BENTLEY": "http://dbpedia.org/resource/Bentley",
|
| 48 |
+
"LAMBORGHINI": "http://dbpedia.org/resource/Lamborghini",
|
| 49 |
+
"TOYOTA": "http://dbpedia.org/resource/Toyota",
|
| 50 |
+
"NISSAN": "http://dbpedia.org/resource/Nissan",
|
| 51 |
+
"VOLVO": "http://dbpedia.org/resource/Volvo_Cars",
|
| 52 |
+
"KIA": "http://dbpedia.org/resource/Kia",
|
| 53 |
+
"HONDA": "http://dbpedia.org/resource/Honda",
|
| 54 |
+
"HYUNDAI": "http://dbpedia.org/resource/Hyundai_Motor_Company",
|
| 55 |
+
"MAHINDRA": "http://dbpedia.org/resource/Mahindra_&_Mahindra",
|
| 56 |
+
"MARUTI SUZUKI": "http://dbpedia.org/resource/Maruti_Suzuki",
|
| 57 |
+
"VOLKSWAGEN": "http://dbpedia.org/resource/Volkswagen",
|
| 58 |
+
"PORSCHE": "http://dbpedia.org/resource/Porsche",
|
| 59 |
+
"CADILLAC": "http://dbpedia.org/resource/Cadillac",
|
| 60 |
+
"TATA MOTORS": "http://dbpedia.org/resource/Tata_Motors",
|
| 61 |
+
"TESLA": "http://dbpedia.org/resource/Tesla,_Inc.",
|
| 62 |
+
"JEEP": "http://dbpedia.org/resource/Jeep",
|
| 63 |
+
"MAZDA": "http://dbpedia.org/resource/Mazda",
|
| 64 |
+
"CHEVROLET": "http://dbpedia.org/resource/Chevrolet",
|
| 65 |
+
"GMC": "http://dbpedia.org/resource/GMC_(automobile)",
|
| 66 |
+
"PEUGEOT": "http://dbpedia.org/resource/Peugeot",
|
| 67 |
+
"BUGATTI": "http://dbpedia.org/resource/Bugatti_Automobiles",
|
| 68 |
+
"JAGUAR LAND ROVER": "http://dbpedia.org/resource/Jaguar_Land_Rover",
|
| 69 |
+
"ACURA": "http://dbpedia.org/resource/Acura",
|
| 70 |
+
"MITSUBISHI": "http://dbpedia.org/resource/Mitsubishi_Motors"
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
dbpedia_body = {
|
| 74 |
+
"Coupe": "http://dbpedia.org/resource/Coupe",
|
| 75 |
+
"Sedan": "http://dbpedia.org/resource/Sedan_(automobile)",
|
| 76 |
+
"SUV": "http://dbpedia.org/resource/Sport_utility_vehicle",
|
| 77 |
+
"SuperCar": "http://dbpedia.org/resource/Supercar",
|
| 78 |
+
"Car": "http://dbpedia.org/resource/Car"
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
# Fuel Mappings
|
| 82 |
+
dbpedia_fuels = {
|
| 83 |
+
"PETROL": "http://dbpedia.org/resource/Gasoline",
|
| 84 |
+
"DIESEL": "http://dbpedia.org/resource/Diesel_fuel",
|
| 85 |
+
"ELECTRIC": "http://dbpedia.org/resource/Electric_vehicle", # Linking to EV concept for fuel type context
|
| 86 |
+
"HYBRID": "http://dbpedia.org/resource/Hybrid_vehicle",
|
| 87 |
+
"PLUG-IN HYBRID": "http://dbpedia.org/resource/Plug-in_hybrid",
|
| 88 |
+
"HYDROGEN": "http://dbpedia.org/resource/Hydrogen_fuel",
|
| 89 |
+
"CNG": "http://dbpedia.org/resource/Compressed_natural_gas"
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
# Engine Mappings (Common types)
|
| 93 |
+
dbpedia_engines = {
|
| 94 |
+
"V8": "http://dbpedia.org/resource/V8_engine",
|
| 95 |
+
"V10": "http://dbpedia.org/resource/V10_engine",
|
| 96 |
+
"V12": "http://dbpedia.org/resource/V12_engine",
|
| 97 |
+
"V6": "http://dbpedia.org/resource/V6_engine",
|
| 98 |
+
"W12": "http://dbpedia.org/resource/W12_engine",
|
| 99 |
+
"W16": "http://dbpedia.org/resource/W16_engine",
|
| 100 |
+
"I4": "http://dbpedia.org/resource/Inline-four_engine",
|
| 101 |
+
"ELECTRIC": "http://dbpedia.org/resource/Electric_motor"
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
# Load CSV
|
| 105 |
+
csv_path = "../Cars Datasets 2025.csv"
|
| 106 |
+
if not os.path.exists(csv_path):
|
| 107 |
+
csv_path = "Cars Datasets 2025.csv"
|
| 108 |
+
|
| 109 |
+
try:
|
| 110 |
+
df = pd.read_csv(csv_path, encoding='latin1')
|
| 111 |
+
except Exception as e:
|
| 112 |
+
print(f"Error reading CSV: {e}")
|
| 113 |
+
return
|
| 114 |
+
|
| 115 |
+
print(f"Processing {len(df)} rows...")
|
| 116 |
+
|
| 117 |
+
for index, row in df.iterrows():
|
| 118 |
+
# Clean Data
|
| 119 |
+
car_name = str(row['Cars Names']).strip()
|
| 120 |
+
comp_name_raw = str(row['Company Names']).strip()
|
| 121 |
+
comp_name_upper = comp_name_raw.upper()
|
| 122 |
+
|
| 123 |
+
# Normalize Company Name for URI
|
| 124 |
+
comp_uri_suffix = comp_name_upper.replace(" ", "_")
|
| 125 |
+
comp_uri = EX[comp_uri_suffix]
|
| 126 |
+
|
| 127 |
+
car_uri = EX[car_name.replace(" ", "_").replace("/", "-").replace("(", "").replace(")", "")]
|
| 128 |
+
|
| 129 |
+
# Add Type
|
| 130 |
+
g.add((car_uri, RDF.type, EX.Car))
|
| 131 |
+
g.add((comp_uri, RDF.type, EX.Manufacturer))
|
| 132 |
+
|
| 133 |
+
# Interlinking: Manufacturer
|
| 134 |
+
if comp_name_upper in dbpedia_manufacturers:
|
| 135 |
+
g.add((comp_uri, OWL.sameAs, URIRef(dbpedia_manufacturers[comp_name_upper])))
|
| 136 |
+
|
| 137 |
+
# Fuel Type Logic
|
| 138 |
+
fuel_raw = str(row['Fuel Types']).strip()
|
| 139 |
+
fuel_clean = "PETROL" # Default
|
| 140 |
+
if "diesel" in fuel_raw.lower(): fuel_clean = "DIESEL"
|
| 141 |
+
elif "electric" in fuel_raw.lower() and "hybrid" not in fuel_raw.lower(): fuel_clean = "ELECTRIC"
|
| 142 |
+
elif "plug" in fuel_raw.lower(): fuel_clean = "PLUG-IN HYBRID"
|
| 143 |
+
elif "hybrid" in fuel_raw.lower(): fuel_clean = "HYBRID"
|
| 144 |
+
elif "hydrogen" in fuel_raw.lower(): fuel_clean = "HYDROGEN"
|
| 145 |
+
elif "cng" in fuel_raw.lower(): fuel_clean = "CNG"
|
| 146 |
+
|
| 147 |
+
fuel_uri = EX[fuel_clean.replace(" ", "_").replace("-", "_")]
|
| 148 |
+
g.add((fuel_uri, RDF.type, EX.FuelType))
|
| 149 |
+
g.add((car_uri, EX.usesFuel, fuel_uri))
|
| 150 |
+
|
| 151 |
+
if fuel_clean in dbpedia_fuels:
|
| 152 |
+
g.add((fuel_uri, OWL.sameAs, URIRef(dbpedia_fuels[fuel_clean])))
|
| 153 |
+
|
| 154 |
+
# Engine Logic
|
| 155 |
+
engine_raw = str(row['Engines']).strip()
|
| 156 |
+
engine_clean = "Engine"
|
| 157 |
+
if "v8" in engine_raw.lower(): engine_clean = "V8"
|
| 158 |
+
elif "v12" in engine_raw.lower(): engine_clean = "V12"
|
| 159 |
+
elif "v10" in engine_raw.lower(): engine_clean = "V10"
|
| 160 |
+
elif "v6" in engine_raw.lower(): engine_clean = "V6"
|
| 161 |
+
elif "w12" in engine_raw.lower(): engine_clean = "W12"
|
| 162 |
+
elif "w16" in engine_raw.lower(): engine_clean = "W16"
|
| 163 |
+
|
| 164 |
+
engine_uri = EX[engine_clean.replace(" ", "_")]
|
| 165 |
+
g.add((engine_uri, RDF.type, EX.Engine))
|
| 166 |
+
g.add((car_uri, EX.hasEngine, engine_uri))
|
| 167 |
+
|
| 168 |
+
if engine_clean in dbpedia_engines:
|
| 169 |
+
g.add((engine_uri, OWL.sameAs, URIRef(dbpedia_engines[engine_clean])))
|
| 170 |
+
|
| 171 |
+
# Determine Car Subclass & Interlinking
|
| 172 |
+
seats = clean_seats(row['Seats'])
|
| 173 |
+
price = clean_price(row['Cars Prices'])
|
| 174 |
+
top_speed = clean_number(row['Total Speed'])
|
| 175 |
+
|
| 176 |
+
car_type = EX.Car
|
| 177 |
+
if seats == 2:
|
| 178 |
+
car_type = EX.Coupe
|
| 179 |
+
g.add((car_uri, RDF.type, EX.Coupe))
|
| 180 |
+
g.add((EX.Coupe, OWL.sameAs, URIRef(dbpedia_body["Coupe"]))) # Class Level link (optional but good)
|
| 181 |
+
elif seats >= 4:
|
| 182 |
+
car_type = EX.Sedan
|
| 183 |
+
g.add((car_uri, RDF.type, EX.Sedan))
|
| 184 |
+
g.add((EX.Sedan, OWL.sameAs, URIRef(dbpedia_body["Sedan"])))
|
| 185 |
+
|
| 186 |
+
if top_speed > 300:
|
| 187 |
+
g.add((car_uri, RDF.type, EX.SuperCar))
|
| 188 |
+
g.add((EX.SuperCar, OWL.sameAs, URIRef(dbpedia_body["SuperCar"])))
|
| 189 |
+
|
| 190 |
+
# Add Properties
|
| 191 |
+
g.add((car_uri, EX.hasManufacturer, comp_uri))
|
| 192 |
+
g.add((car_uri, RDFS.label, Literal(car_name, datatype=XSD.string)))
|
| 193 |
+
g.add((comp_uri, RDFS.label, Literal(comp_name_raw, datatype=XSD.string)))
|
| 194 |
+
g.add((fuel_uri, RDFS.label, Literal(fuel_clean, datatype=XSD.string)))
|
| 195 |
+
|
| 196 |
+
g.add((car_uri, EX.hasPriceValue, Literal(price, datatype=XSD.float)))
|
| 197 |
+
g.add((car_uri, EX.hasSeatCount, Literal(seats, datatype=XSD.integer)))
|
| 198 |
+
g.add((car_uri, EX.hasTopSpeedKMH, Literal(int(top_speed), datatype=XSD.integer)))
|
| 199 |
+
|
| 200 |
+
hp = clean_number(row['HorsePower'])
|
| 201 |
+
g.add((car_uri, EX.hasHorsePowerValue, Literal(int(hp), datatype=XSD.integer)))
|
| 202 |
+
|
| 203 |
+
# Save Graph
|
| 204 |
+
g.serialize(destination="cars_knowledge_graph.ttl", format="turtle")
|
| 205 |
+
print(f"Knowledge Graph saved to cars_knowledge_graph.ttl with {len(g)} triples.")
|
| 206 |
+
|
| 207 |
+
if __name__ == "__main__":
|
| 208 |
+
convert_data()
|
src/ontology.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from rdflib import Graph, Namespace, RDF, RDFS, OWL, Literal, XSD
|
| 2 |
+
|
| 3 |
+
def create_ontology():
|
| 4 |
+
g = Graph()
|
| 5 |
+
|
| 6 |
+
# Define Namespace
|
| 7 |
+
EX = Namespace("http://example.org/cars/")
|
| 8 |
+
g.bind("ex", EX)
|
| 9 |
+
|
| 10 |
+
# Enable OWL/RDFS
|
| 11 |
+
g.bind("owl", OWL)
|
| 12 |
+
|
| 13 |
+
# ==========================
|
| 14 |
+
# CLASSES (Goal: 20+)
|
| 15 |
+
# ==========================
|
| 16 |
+
classes = [
|
| 17 |
+
"Car", "Component", "Engine", "FuelType", "Manufacturer",
|
| 18 |
+
"Color", "PassengerCar", "TwoSeaterCar", "FourSeaterCar",
|
| 19 |
+
"FiveSeaterCar", "Coupe", "Sedan", "SUV", "SportsCar",
|
| 20 |
+
"SuperCar", "HyperCar", "EconomyCar", "ElectricCar",
|
| 21 |
+
"ICECar", "HybridCar", "PetrolCar", "DieselCar",
|
| 22 |
+
"VEngine", "V8Engine", "V10Engine", "V12Engine",
|
| 23 |
+
"ElectricMotor", "EcoFriendlyCar", "LuxuryCar", "LuxurySportCar"
|
| 24 |
+
]
|
| 25 |
+
|
| 26 |
+
for cls in classes:
|
| 27 |
+
g.add((EX[cls], RDF.type, OWL.Class))
|
| 28 |
+
|
| 29 |
+
# Class Hierarchy
|
| 30 |
+
g.add((EX.PassengerCar, RDFS.subClassOf, EX.Car))
|
| 31 |
+
g.add((EX.SportsCar, RDFS.subClassOf, EX.Car))
|
| 32 |
+
g.add((EX.Engine, RDFS.subClassOf, EX.Component))
|
| 33 |
+
g.add((EX.ElectricCar, RDFS.subClassOf, EX.Car))
|
| 34 |
+
g.add((EX.PetrolCar, RDFS.subClassOf, EX.Car))
|
| 35 |
+
|
| 36 |
+
# ==========================
|
| 37 |
+
# PROPERTIES (Goal: 7+ Object, 7+ Data)
|
| 38 |
+
# ==========================
|
| 39 |
+
|
| 40 |
+
# Object Properties
|
| 41 |
+
obj_props = [
|
| 42 |
+
"hasManufacturer", "manufactures", "hasEngine", "usesFuel",
|
| 43 |
+
"hasCompetitor", "isModelVariantOf", "hasTrimLevel"
|
| 44 |
+
]
|
| 45 |
+
|
| 46 |
+
for prop in obj_props:
|
| 47 |
+
g.add((EX[prop], RDF.type, OWL.ObjectProperty))
|
| 48 |
+
|
| 49 |
+
# Domain/Range examples
|
| 50 |
+
g.add((EX.hasManufacturer, RDFS.domain, EX.Car))
|
| 51 |
+
g.add((EX.hasManufacturer, RDFS.range, EX.Manufacturer))
|
| 52 |
+
|
| 53 |
+
# Datatype Properties
|
| 54 |
+
data_props = [
|
| 55 |
+
"hasPriceValue", "hasHorsePowerValue", "hasTopSpeedKMH",
|
| 56 |
+
"hasSeatCount", "hasTorqueNm", "has0to100Sec", "hasEngineDisplacement"
|
| 57 |
+
]
|
| 58 |
+
|
| 59 |
+
for prop in data_props:
|
| 60 |
+
g.add((EX[prop], RDF.type, OWL.DatatypeProperty))
|
| 61 |
+
g.add((EX[prop], RDFS.domain, EX.Car))
|
| 62 |
+
|
| 63 |
+
# Save Ontology T-Box
|
| 64 |
+
g.serialize(destination="cars_ontology.ttl", format="turtle")
|
| 65 |
+
print("Ontology T-Box saved to cars_ontology.ttl")
|
| 66 |
+
|
| 67 |
+
if __name__ == "__main__":
|
| 68 |
+
create_ontology()
|
src/publish.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, Query, Request, HTTPException
|
| 2 |
+
from fastapi.responses import JSONResponse, HTMLResponse
|
| 3 |
+
from rdflib import Graph
|
| 4 |
+
import os
|
| 5 |
+
import uvicorn
|
| 6 |
+
|
| 7 |
+
app = FastAPI(title="Cars Knowledge Graph SPARQL Endpoint")
|
| 8 |
+
|
| 9 |
+
# Load the Knowledge Graph
|
| 10 |
+
g = Graph()
|
| 11 |
+
GRAPH_FILE = "cars_knowledge_graph.ttl"
|
| 12 |
+
|
| 13 |
+
if os.path.exists(GRAPH_FILE):
|
| 14 |
+
print(f"Loading Knowledge Graph from {GRAPH_FILE}...")
|
| 15 |
+
g.parse(GRAPH_FILE, format="turtle")
|
| 16 |
+
print(f"Graph loaded with {len(g)} triples.")
|
| 17 |
+
else:
|
| 18 |
+
print(f"WARNING: {GRAPH_FILE} not found. Please run convert_data.py first.")
|
| 19 |
+
|
| 20 |
+
@app.get("/", response_class=HTMLResponse)
|
| 21 |
+
async def home():
|
| 22 |
+
return """
|
| 23 |
+
<h1>Cars Knowledge Graph SPARQL Endpoint (FastAPI)</h1>
|
| 24 |
+
<p>The Knowledge Graph is published and accessible.</p>
|
| 25 |
+
<p>Send SPARQL queries to: <code>/sparql</code></p>
|
| 26 |
+
<h3>Example Query:</h3>
|
| 27 |
+
<pre>
|
| 28 |
+
SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT 10
|
| 29 |
+
</pre>
|
| 30 |
+
<p>View API Docs at: <a href="/docs">/docs</a></p>
|
| 31 |
+
"""
|
| 32 |
+
|
| 33 |
+
async def run_query(query: str):
|
| 34 |
+
if not query:
|
| 35 |
+
raise HTTPException(status_code=400, detail="No query provided")
|
| 36 |
+
|
| 37 |
+
try:
|
| 38 |
+
results = g.query(query)
|
| 39 |
+
|
| 40 |
+
# Format results as JSON
|
| 41 |
+
res_list = []
|
| 42 |
+
for row in results:
|
| 43 |
+
res_dict = {}
|
| 44 |
+
if getattr(results, "vars", None):
|
| 45 |
+
for i, var in enumerate(results.vars):
|
| 46 |
+
if row[i] is not None:
|
| 47 |
+
res_dict[str(var)] = str(row[i])
|
| 48 |
+
res_list.append(res_dict)
|
| 49 |
+
|
| 50 |
+
return {"results": res_list}
|
| 51 |
+
|
| 52 |
+
except Exception as e:
|
| 53 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 54 |
+
|
| 55 |
+
@app.get("/sparql")
|
| 56 |
+
async def sparql_get(query: str = Query(..., description="SPARQL Query")):
|
| 57 |
+
return await run_query(query)
|
| 58 |
+
|
| 59 |
+
@app.post("/sparql")
|
| 60 |
+
async def sparql_post(request: Request):
|
| 61 |
+
# Handle both form data and raw body
|
| 62 |
+
content_type = request.headers.get("content-type", "")
|
| 63 |
+
if "application/x-www-form-urlencoded" in content_type:
|
| 64 |
+
form = await request.form()
|
| 65 |
+
query = form.get("query")
|
| 66 |
+
else:
|
| 67 |
+
# data = await request.json() # Optional depending on client
|
| 68 |
+
query = (await request.body()).decode("utf-8")
|
| 69 |
+
|
| 70 |
+
return await run_query(query)
|
| 71 |
+
|
| 72 |
+
if __name__ == "__main__":
|
| 73 |
+
print("Starting SPARQL Endpoint on http://localhost:8000")
|
| 74 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
src/validate.py
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from rdflib import Graph, Namespace
|
| 2 |
+
import pandas as pd
|
| 3 |
+
|
| 4 |
+
def run_validation():
|
| 5 |
+
g = Graph()
|
| 6 |
+
g.parse("cars_knowledge_graph.ttl", format="turtle")
|
| 7 |
+
|
| 8 |
+
EX = Namespace("http://example.org/cars/")
|
| 9 |
+
|
| 10 |
+
queries = {
|
| 11 |
+
"1. List all cars manufactured by Ferrari": """
|
| 12 |
+
PREFIX ex: <http://example.org/cars/>
|
| 13 |
+
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|
| 14 |
+
|
| 15 |
+
SELECT ?car_name
|
| 16 |
+
WHERE {
|
| 17 |
+
?car ex:hasManufacturer ?manu .
|
| 18 |
+
?manu rdfs:label ?manu_name .
|
| 19 |
+
FILTER (REGEX(?manu_name, "Ferrari", "i"))
|
| 20 |
+
?car rdfs:label ?car_name .
|
| 21 |
+
}
|
| 22 |
+
""",
|
| 23 |
+
"2. Cars with HorsePower > 800": """
|
| 24 |
+
PREFIX ex: <http://example.org/cars/>
|
| 25 |
+
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|
| 26 |
+
|
| 27 |
+
SELECT ?car_name ?hp
|
| 28 |
+
WHERE {
|
| 29 |
+
?car ex:hasHorsePowerValue ?hp .
|
| 30 |
+
?car rdfs:label ?car_name .
|
| 31 |
+
FILTER (?hp > 800)
|
| 32 |
+
}
|
| 33 |
+
""",
|
| 34 |
+
"3. Count of 2-Seater cars": """
|
| 35 |
+
PREFIX ex: <http://example.org/cars/>
|
| 36 |
+
|
| 37 |
+
SELECT (COUNT(?car) as ?count)
|
| 38 |
+
WHERE {
|
| 39 |
+
?car a ex:Coupe .
|
| 40 |
+
}
|
| 41 |
+
""",
|
| 42 |
+
"4. Average Price of cars": """
|
| 43 |
+
PREFIX ex: <http://example.org/cars/>
|
| 44 |
+
|
| 45 |
+
SELECT (AVG(?price) as ?avg_price)
|
| 46 |
+
WHERE {
|
| 47 |
+
?car ex:hasPriceValue ?price .
|
| 48 |
+
}
|
| 49 |
+
""",
|
| 50 |
+
"5. (Federated) Get Manufacturer Description from DBpedia": """
|
| 51 |
+
PREFIX ex: <http://example.org/cars/>
|
| 52 |
+
PREFIX owl: <http://www.w3.org/2002/07/owl#>
|
| 53 |
+
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|
| 54 |
+
PREFIX dbo: <http://dbpedia.org/ontology/>
|
| 55 |
+
|
| 56 |
+
SELECT ?manu_name ?desc
|
| 57 |
+
WHERE {
|
| 58 |
+
?manu a ex:Manufacturer ;
|
| 59 |
+
rdfs:label ?manu_name ;
|
| 60 |
+
owl:sameAs ?dbpedia_link .
|
| 61 |
+
|
| 62 |
+
SERVICE <http://dbpedia.org/sparql> {
|
| 63 |
+
?dbpedia_link dbo:abstract ?desc .
|
| 64 |
+
FILTER (LANG(?desc) = 'en')
|
| 65 |
+
}
|
| 66 |
+
}
|
| 67 |
+
LIMIT 3
|
| 68 |
+
"""
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
print(f"Loaded Graph with {len(g)} triples.\n")
|
| 72 |
+
|
| 73 |
+
# Run Local Queries
|
| 74 |
+
for title, query in list(queries.items())[:-1]: # Skip the last one for special handling
|
| 75 |
+
print(f"--- {title} ---")
|
| 76 |
+
try:
|
| 77 |
+
results = g.query(query)
|
| 78 |
+
for row in results:
|
| 79 |
+
print(row)
|
| 80 |
+
except Exception as e:
|
| 81 |
+
print(f"Query Error: {e}")
|
| 82 |
+
print("\n")
|
| 83 |
+
|
| 84 |
+
# Run Federated Query (Special Handling)
|
| 85 |
+
print(f"--- 5. (Federated) Get Manufacturer Description from DBpedia ---")
|
| 86 |
+
print("Attempting to run SERVICE query via rdflib (may fail due to DBpedia restrictions)...")
|
| 87 |
+
fed_query = queries["5. (Federated) Get Manufacturer Description from DBpedia"]
|
| 88 |
+
|
| 89 |
+
try:
|
| 90 |
+
results = g.query(fed_query)
|
| 91 |
+
for row in results:
|
| 92 |
+
print(row)
|
| 93 |
+
except Exception as e:
|
| 94 |
+
print(f"Standard SERVICE query failed ({e}).\nData is likely interlinked, but local engine cannot negotiate with DBpedia.")
|
| 95 |
+
print("Fallback: Verifying Interlinking manually via SPARQLWrapper...")
|
| 96 |
+
|
| 97 |
+
try:
|
| 98 |
+
from SPARQLWrapper import SPARQLWrapper, JSON
|
| 99 |
+
sparql = SPARQLWrapper("http://dbpedia.org/sparql")
|
| 100 |
+
|
| 101 |
+
# Get a manufacturer link from our graph
|
| 102 |
+
manu_link_query = """
|
| 103 |
+
PREFIX ex: <http://example.org/cars/>
|
| 104 |
+
PREFIX owl: <http://www.w3.org/2002/07/owl#>
|
| 105 |
+
SELECT ?link WHERE { ?m a ex:Manufacturer ; owl:sameAs ?link . } LIMIT 1
|
| 106 |
+
"""
|
| 107 |
+
res = g.query(manu_link_query)
|
| 108 |
+
link = list(res)[0][0]
|
| 109 |
+
print(f"Found Interlink: {link}")
|
| 110 |
+
|
| 111 |
+
# Query DBpedia for that link
|
| 112 |
+
dbpedia_q = f"""
|
| 113 |
+
PREFIX dbo: <http://dbpedia.org/ontology/>
|
| 114 |
+
SELECT ?desc WHERE {{ <{link}> dbo:abstract ?desc . FILTER (LANG(?desc) = 'en') }} LIMIT 1
|
| 115 |
+
"""
|
| 116 |
+
sparql.setQuery(dbpedia_q)
|
| 117 |
+
sparql.setReturnFormat(JSON)
|
| 118 |
+
results = sparql.query().convert()
|
| 119 |
+
|
| 120 |
+
for result in results["results"]["bindings"]:
|
| 121 |
+
print(f"Description from DBpedia: {result['desc']['value'][:200]}...")
|
| 122 |
+
|
| 123 |
+
except ImportError:
|
| 124 |
+
print("SPARQLWrapper not installed. Cannot run fallback.")
|
| 125 |
+
except Exception as ex:
|
| 126 |
+
print(f"Fallback failed: {ex}")
|
| 127 |
+
|
| 128 |
+
if __name__ == "__main__":
|
| 129 |
+
run_validation()
|