|
|
import pandas as pd |
|
|
from rdflib import Graph, Namespace, RDF, RDFS, OWL, Literal, XSD, URIRef |
|
|
import re |
|
|
import os |
|
|
|
|
|
|
|
|
EX = Namespace("http://example.org/cars/") |
|
|
|
|
|
def clean_price(value): |
|
|
if pd.isna(value): return 0.0 |
|
|
val_str = str(value).replace('$', '').replace(',', '').strip() |
|
|
match = re.search(r'([\d\.]+)', val_str) |
|
|
return float(match.group(1)) if match else 0.0 |
|
|
|
|
|
def clean_number(value): |
|
|
if pd.isna(value): return 0 |
|
|
match = re.search(r'([\d\.,]+)', str(value)) |
|
|
if match: |
|
|
return float(match.group(1).replace(',', '')) |
|
|
return 0 |
|
|
|
|
|
def clean_seats(value): |
|
|
if pd.isna(value): return 2 |
|
|
match = re.search(r'(\d+)', str(value)) |
|
|
return int(match.group(1)) if match else 2 |
|
|
|
|
|
def convert_data(): |
|
|
|
|
|
g = Graph() |
|
|
g.bind("ex", EX) |
|
|
g.bind("owl", OWL) |
|
|
g.bind("rdfs", RDFS) |
|
|
|
|
|
|
|
|
if os.path.exists("cars_ontology.ttl"): |
|
|
g.parse("cars_ontology.ttl", format="turtle") |
|
|
|
|
|
|
|
|
dbpedia_manufacturers = { |
|
|
"FERRARI": "http://dbpedia.org/resource/Ferrari", |
|
|
"ROLLS ROYCE": "http://dbpedia.org/resource/Rolls-Royce_Motor_Cars", |
|
|
"FORD": "http://dbpedia.org/resource/Ford_Motor_Company", |
|
|
"MERCEDES": "http://dbpedia.org/resource/Mercedes-Benz", |
|
|
"AUDI": "http://dbpedia.org/resource/Audi", |
|
|
"BMW": "http://dbpedia.org/resource/BMW", |
|
|
"ASTON MARTIN": "http://dbpedia.org/resource/Aston_Martin", |
|
|
"BENTLEY": "http://dbpedia.org/resource/Bentley", |
|
|
"LAMBORGHINI": "http://dbpedia.org/resource/Lamborghini", |
|
|
"TOYOTA": "http://dbpedia.org/resource/Toyota", |
|
|
"NISSAN": "http://dbpedia.org/resource/Nissan", |
|
|
"VOLVO": "http://dbpedia.org/resource/Volvo_Cars", |
|
|
"KIA": "http://dbpedia.org/resource/Kia", |
|
|
"HONDA": "http://dbpedia.org/resource/Honda", |
|
|
"HYUNDAI": "http://dbpedia.org/resource/Hyundai_Motor_Company", |
|
|
"MAHINDRA": "http://dbpedia.org/resource/Mahindra_&_Mahindra", |
|
|
"MARUTI SUZUKI": "http://dbpedia.org/resource/Maruti_Suzuki", |
|
|
"VOLKSWAGEN": "http://dbpedia.org/resource/Volkswagen", |
|
|
"PORSCHE": "http://dbpedia.org/resource/Porsche", |
|
|
"CADILLAC": "http://dbpedia.org/resource/Cadillac", |
|
|
"TATA MOTORS": "http://dbpedia.org/resource/Tata_Motors", |
|
|
"TESLA": "http://dbpedia.org/resource/Tesla,_Inc.", |
|
|
"JEEP": "http://dbpedia.org/resource/Jeep", |
|
|
"MAZDA": "http://dbpedia.org/resource/Mazda", |
|
|
"CHEVROLET": "http://dbpedia.org/resource/Chevrolet", |
|
|
"GMC": "http://dbpedia.org/resource/GMC_(automobile)", |
|
|
"PEUGEOT": "http://dbpedia.org/resource/Peugeot", |
|
|
"BUGATTI": "http://dbpedia.org/resource/Bugatti_Automobiles", |
|
|
"JAGUAR LAND ROVER": "http://dbpedia.org/resource/Jaguar_Land_Rover", |
|
|
"ACURA": "http://dbpedia.org/resource/Acura", |
|
|
"MITSUBISHI": "http://dbpedia.org/resource/Mitsubishi_Motors" |
|
|
} |
|
|
|
|
|
dbpedia_body = { |
|
|
"Coupe": "http://dbpedia.org/resource/Coupe", |
|
|
"Sedan": "http://dbpedia.org/resource/Sedan_(automobile)", |
|
|
"SUV": "http://dbpedia.org/resource/Sport_utility_vehicle", |
|
|
"SuperCar": "http://dbpedia.org/resource/Supercar", |
|
|
"Car": "http://dbpedia.org/resource/Car" |
|
|
} |
|
|
|
|
|
|
|
|
dbpedia_fuels = { |
|
|
"PETROL": "http://dbpedia.org/resource/Gasoline", |
|
|
"DIESEL": "http://dbpedia.org/resource/Diesel_fuel", |
|
|
"ELECTRIC": "http://dbpedia.org/resource/Electric_vehicle", |
|
|
"HYBRID": "http://dbpedia.org/resource/Hybrid_vehicle", |
|
|
"PLUG-IN HYBRID": "http://dbpedia.org/resource/Plug-in_hybrid", |
|
|
"HYDROGEN": "http://dbpedia.org/resource/Hydrogen_fuel", |
|
|
"CNG": "http://dbpedia.org/resource/Compressed_natural_gas" |
|
|
} |
|
|
|
|
|
|
|
|
dbpedia_engines = { |
|
|
"V8": "http://dbpedia.org/resource/V8_engine", |
|
|
"V10": "http://dbpedia.org/resource/V10_engine", |
|
|
"V12": "http://dbpedia.org/resource/V12_engine", |
|
|
"V6": "http://dbpedia.org/resource/V6_engine", |
|
|
"W12": "http://dbpedia.org/resource/W12_engine", |
|
|
"W16": "http://dbpedia.org/resource/W16_engine", |
|
|
"I4": "http://dbpedia.org/resource/Inline-four_engine", |
|
|
"ELECTRIC": "http://dbpedia.org/resource/Electric_motor" |
|
|
} |
|
|
|
|
|
|
|
|
csv_path = "../Cars Datasets 2025.csv" |
|
|
if not os.path.exists(csv_path): |
|
|
csv_path = "Cars Datasets 2025.csv" |
|
|
|
|
|
try: |
|
|
df = pd.read_csv(csv_path, encoding='latin1') |
|
|
except Exception as e: |
|
|
print(f"Error reading CSV: {e}") |
|
|
return |
|
|
|
|
|
print(f"Processing {len(df)} rows...") |
|
|
|
|
|
for index, row in df.iterrows(): |
|
|
|
|
|
car_name = str(row['Cars Names']).strip() |
|
|
comp_name_raw = str(row['Company Names']).strip() |
|
|
comp_name_upper = comp_name_raw.upper() |
|
|
|
|
|
|
|
|
comp_uri_suffix = comp_name_upper.replace(" ", "_") |
|
|
comp_uri = EX[comp_uri_suffix] |
|
|
|
|
|
car_uri = EX[car_name.replace(" ", "_").replace("/", "-").replace("(", "").replace(")", "")] |
|
|
|
|
|
|
|
|
g.add((car_uri, RDF.type, EX.Car)) |
|
|
g.add((comp_uri, RDF.type, EX.Manufacturer)) |
|
|
|
|
|
|
|
|
if comp_name_upper in dbpedia_manufacturers: |
|
|
g.add((comp_uri, OWL.sameAs, URIRef(dbpedia_manufacturers[comp_name_upper]))) |
|
|
|
|
|
|
|
|
fuel_raw = str(row['Fuel Types']).strip() |
|
|
fuel_clean = "PETROL" |
|
|
if "diesel" in fuel_raw.lower(): fuel_clean = "DIESEL" |
|
|
elif "electric" in fuel_raw.lower() and "hybrid" not in fuel_raw.lower(): fuel_clean = "ELECTRIC" |
|
|
elif "plug" in fuel_raw.lower(): fuel_clean = "PLUG-IN HYBRID" |
|
|
elif "hybrid" in fuel_raw.lower(): fuel_clean = "HYBRID" |
|
|
elif "hydrogen" in fuel_raw.lower(): fuel_clean = "HYDROGEN" |
|
|
elif "cng" in fuel_raw.lower(): fuel_clean = "CNG" |
|
|
|
|
|
fuel_uri = EX[fuel_clean.replace(" ", "_").replace("-", "_")] |
|
|
g.add((fuel_uri, RDF.type, EX.FuelType)) |
|
|
g.add((car_uri, EX.usesFuel, fuel_uri)) |
|
|
|
|
|
if fuel_clean in dbpedia_fuels: |
|
|
g.add((fuel_uri, OWL.sameAs, URIRef(dbpedia_fuels[fuel_clean]))) |
|
|
|
|
|
|
|
|
engine_raw = str(row['Engines']).strip() |
|
|
engine_clean = "Engine" |
|
|
if "v8" in engine_raw.lower(): engine_clean = "V8" |
|
|
elif "v12" in engine_raw.lower(): engine_clean = "V12" |
|
|
elif "v10" in engine_raw.lower(): engine_clean = "V10" |
|
|
elif "v6" in engine_raw.lower(): engine_clean = "V6" |
|
|
elif "w12" in engine_raw.lower(): engine_clean = "W12" |
|
|
elif "w16" in engine_raw.lower(): engine_clean = "W16" |
|
|
|
|
|
engine_uri = EX[engine_clean.replace(" ", "_")] |
|
|
g.add((engine_uri, RDF.type, EX.Engine)) |
|
|
g.add((car_uri, EX.hasEngine, engine_uri)) |
|
|
|
|
|
if engine_clean in dbpedia_engines: |
|
|
g.add((engine_uri, OWL.sameAs, URIRef(dbpedia_engines[engine_clean]))) |
|
|
|
|
|
|
|
|
seats = clean_seats(row['Seats']) |
|
|
price = clean_price(row['Cars Prices']) |
|
|
top_speed = clean_number(row['Total Speed']) |
|
|
|
|
|
car_type = EX.Car |
|
|
if seats == 2: |
|
|
car_type = EX.Coupe |
|
|
g.add((car_uri, RDF.type, EX.Coupe)) |
|
|
g.add((EX.Coupe, OWL.sameAs, URIRef(dbpedia_body["Coupe"]))) |
|
|
elif seats >= 4: |
|
|
car_type = EX.Sedan |
|
|
g.add((car_uri, RDF.type, EX.Sedan)) |
|
|
g.add((EX.Sedan, OWL.sameAs, URIRef(dbpedia_body["Sedan"]))) |
|
|
|
|
|
if top_speed > 300: |
|
|
g.add((car_uri, RDF.type, EX.SuperCar)) |
|
|
g.add((EX.SuperCar, OWL.sameAs, URIRef(dbpedia_body["SuperCar"]))) |
|
|
|
|
|
|
|
|
g.add((car_uri, EX.hasManufacturer, comp_uri)) |
|
|
g.add((car_uri, RDFS.label, Literal(car_name, datatype=XSD.string))) |
|
|
g.add((comp_uri, RDFS.label, Literal(comp_name_raw, datatype=XSD.string))) |
|
|
g.add((fuel_uri, RDFS.label, Literal(fuel_clean, datatype=XSD.string))) |
|
|
|
|
|
g.add((car_uri, EX.hasPriceValue, Literal(price, datatype=XSD.float))) |
|
|
g.add((car_uri, EX.hasSeatCount, Literal(seats, datatype=XSD.integer))) |
|
|
g.add((car_uri, EX.hasTopSpeedKMH, Literal(int(top_speed), datatype=XSD.integer))) |
|
|
|
|
|
hp = clean_number(row['HorsePower']) |
|
|
g.add((car_uri, EX.hasHorsePowerValue, Literal(int(hp), datatype=XSD.integer))) |
|
|
|
|
|
|
|
|
g.serialize(destination="cars_knowledge_graph.ttl", format="turtle") |
|
|
print(f"Knowledge Graph saved to cars_knowledge_graph.ttl with {len(g)} triples.") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
convert_data() |
|
|
|