import pandas as pd from rdflib import Graph, Namespace, RDF, RDFS, OWL, Literal, XSD, URIRef import re import os # Define Namespace EX = Namespace("http://example.org/cars/") def clean_price(value): if pd.isna(value): return 0.0 val_str = str(value).replace('$', '').replace(',', '').strip() match = re.search(r'([\d\.]+)', val_str) return float(match.group(1)) if match else 0.0 def clean_number(value): if pd.isna(value): return 0 match = re.search(r'([\d\.,]+)', str(value)) if match: return float(match.group(1).replace(',', '')) return 0 def clean_seats(value): if pd.isna(value): return 2 match = re.search(r'(\d+)', str(value)) return int(match.group(1)) if match else 2 def convert_data(): # Load or Create Graph g = Graph() g.bind("ex", EX) g.bind("owl", OWL) g.bind("rdfs", RDFS) # Load Ontology T-Box (if exists, to keep definitions) if os.path.exists("cars_ontology.ttl"): g.parse("cars_ontology.ttl", format="turtle") # DBpedia Mappings dbpedia_manufacturers = { "FERRARI": "http://dbpedia.org/resource/Ferrari", "ROLLS ROYCE": "http://dbpedia.org/resource/Rolls-Royce_Motor_Cars", "FORD": "http://dbpedia.org/resource/Ford_Motor_Company", "MERCEDES": "http://dbpedia.org/resource/Mercedes-Benz", "AUDI": "http://dbpedia.org/resource/Audi", "BMW": "http://dbpedia.org/resource/BMW", "ASTON MARTIN": "http://dbpedia.org/resource/Aston_Martin", "BENTLEY": "http://dbpedia.org/resource/Bentley", "LAMBORGHINI": "http://dbpedia.org/resource/Lamborghini", "TOYOTA": "http://dbpedia.org/resource/Toyota", "NISSAN": "http://dbpedia.org/resource/Nissan", "VOLVO": "http://dbpedia.org/resource/Volvo_Cars", "KIA": "http://dbpedia.org/resource/Kia", "HONDA": "http://dbpedia.org/resource/Honda", "HYUNDAI": "http://dbpedia.org/resource/Hyundai_Motor_Company", "MAHINDRA": "http://dbpedia.org/resource/Mahindra_&_Mahindra", "MARUTI SUZUKI": "http://dbpedia.org/resource/Maruti_Suzuki", "VOLKSWAGEN": "http://dbpedia.org/resource/Volkswagen", "PORSCHE": "http://dbpedia.org/resource/Porsche", "CADILLAC": "http://dbpedia.org/resource/Cadillac", "TATA MOTORS": "http://dbpedia.org/resource/Tata_Motors", "TESLA": "http://dbpedia.org/resource/Tesla,_Inc.", "JEEP": "http://dbpedia.org/resource/Jeep", "MAZDA": "http://dbpedia.org/resource/Mazda", "CHEVROLET": "http://dbpedia.org/resource/Chevrolet", "GMC": "http://dbpedia.org/resource/GMC_(automobile)", "PEUGEOT": "http://dbpedia.org/resource/Peugeot", "BUGATTI": "http://dbpedia.org/resource/Bugatti_Automobiles", "JAGUAR LAND ROVER": "http://dbpedia.org/resource/Jaguar_Land_Rover", "ACURA": "http://dbpedia.org/resource/Acura", "MITSUBISHI": "http://dbpedia.org/resource/Mitsubishi_Motors" } dbpedia_body = { "Coupe": "http://dbpedia.org/resource/Coupe", "Sedan": "http://dbpedia.org/resource/Sedan_(automobile)", "SUV": "http://dbpedia.org/resource/Sport_utility_vehicle", "SuperCar": "http://dbpedia.org/resource/Supercar", "Car": "http://dbpedia.org/resource/Car" } # Fuel Mappings dbpedia_fuels = { "PETROL": "http://dbpedia.org/resource/Gasoline", "DIESEL": "http://dbpedia.org/resource/Diesel_fuel", "ELECTRIC": "http://dbpedia.org/resource/Electric_vehicle", # Linking to EV concept for fuel type context "HYBRID": "http://dbpedia.org/resource/Hybrid_vehicle", "PLUG-IN HYBRID": "http://dbpedia.org/resource/Plug-in_hybrid", "HYDROGEN": "http://dbpedia.org/resource/Hydrogen_fuel", "CNG": "http://dbpedia.org/resource/Compressed_natural_gas" } # Engine Mappings (Common types) dbpedia_engines = { "V8": "http://dbpedia.org/resource/V8_engine", "V10": "http://dbpedia.org/resource/V10_engine", "V12": "http://dbpedia.org/resource/V12_engine", "V6": "http://dbpedia.org/resource/V6_engine", "W12": "http://dbpedia.org/resource/W12_engine", "W16": "http://dbpedia.org/resource/W16_engine", "I4": "http://dbpedia.org/resource/Inline-four_engine", "ELECTRIC": "http://dbpedia.org/resource/Electric_motor" } # Load CSV csv_path = "../Cars Datasets 2025.csv" if not os.path.exists(csv_path): csv_path = "Cars Datasets 2025.csv" try: df = pd.read_csv(csv_path, encoding='latin1') except Exception as e: print(f"Error reading CSV: {e}") return print(f"Processing {len(df)} rows...") for index, row in df.iterrows(): # Clean Data car_name = str(row['Cars Names']).strip() comp_name_raw = str(row['Company Names']).strip() comp_name_upper = comp_name_raw.upper() # Normalize Company Name for URI comp_uri_suffix = comp_name_upper.replace(" ", "_") comp_uri = EX[comp_uri_suffix] car_uri = EX[car_name.replace(" ", "_").replace("/", "-").replace("(", "").replace(")", "")] # Add Type g.add((car_uri, RDF.type, EX.Car)) g.add((comp_uri, RDF.type, EX.Manufacturer)) # Interlinking: Manufacturer if comp_name_upper in dbpedia_manufacturers: g.add((comp_uri, OWL.sameAs, URIRef(dbpedia_manufacturers[comp_name_upper]))) # Fuel Type Logic fuel_raw = str(row['Fuel Types']).strip() fuel_clean = "PETROL" # Default if "diesel" in fuel_raw.lower(): fuel_clean = "DIESEL" elif "electric" in fuel_raw.lower() and "hybrid" not in fuel_raw.lower(): fuel_clean = "ELECTRIC" elif "plug" in fuel_raw.lower(): fuel_clean = "PLUG-IN HYBRID" elif "hybrid" in fuel_raw.lower(): fuel_clean = "HYBRID" elif "hydrogen" in fuel_raw.lower(): fuel_clean = "HYDROGEN" elif "cng" in fuel_raw.lower(): fuel_clean = "CNG" fuel_uri = EX[fuel_clean.replace(" ", "_").replace("-", "_")] g.add((fuel_uri, RDF.type, EX.FuelType)) g.add((car_uri, EX.usesFuel, fuel_uri)) if fuel_clean in dbpedia_fuels: g.add((fuel_uri, OWL.sameAs, URIRef(dbpedia_fuels[fuel_clean]))) # Engine Logic engine_raw = str(row['Engines']).strip() engine_clean = "Engine" if "v8" in engine_raw.lower(): engine_clean = "V8" elif "v12" in engine_raw.lower(): engine_clean = "V12" elif "v10" in engine_raw.lower(): engine_clean = "V10" elif "v6" in engine_raw.lower(): engine_clean = "V6" elif "w12" in engine_raw.lower(): engine_clean = "W12" elif "w16" in engine_raw.lower(): engine_clean = "W16" engine_uri = EX[engine_clean.replace(" ", "_")] g.add((engine_uri, RDF.type, EX.Engine)) g.add((car_uri, EX.hasEngine, engine_uri)) if engine_clean in dbpedia_engines: g.add((engine_uri, OWL.sameAs, URIRef(dbpedia_engines[engine_clean]))) # Determine Car Subclass & Interlinking seats = clean_seats(row['Seats']) price = clean_price(row['Cars Prices']) top_speed = clean_number(row['Total Speed']) car_type = EX.Car if seats == 2: car_type = EX.Coupe g.add((car_uri, RDF.type, EX.Coupe)) g.add((EX.Coupe, OWL.sameAs, URIRef(dbpedia_body["Coupe"]))) # Class Level link (optional but good) elif seats >= 4: car_type = EX.Sedan g.add((car_uri, RDF.type, EX.Sedan)) g.add((EX.Sedan, OWL.sameAs, URIRef(dbpedia_body["Sedan"]))) if top_speed > 300: g.add((car_uri, RDF.type, EX.SuperCar)) g.add((EX.SuperCar, OWL.sameAs, URIRef(dbpedia_body["SuperCar"]))) # Add Properties g.add((car_uri, EX.hasManufacturer, comp_uri)) g.add((car_uri, RDFS.label, Literal(car_name, datatype=XSD.string))) g.add((comp_uri, RDFS.label, Literal(comp_name_raw, datatype=XSD.string))) g.add((fuel_uri, RDFS.label, Literal(fuel_clean, datatype=XSD.string))) g.add((car_uri, EX.hasPriceValue, Literal(price, datatype=XSD.float))) g.add((car_uri, EX.hasSeatCount, Literal(seats, datatype=XSD.integer))) g.add((car_uri, EX.hasTopSpeedKMH, Literal(int(top_speed), datatype=XSD.integer))) hp = clean_number(row['HorsePower']) g.add((car_uri, EX.hasHorsePowerValue, Literal(int(hp), datatype=XSD.integer))) # Save Graph g.serialize(destination="cars_knowledge_graph.ttl", format="turtle") print(f"Knowledge Graph saved to cars_knowledge_graph.ttl with {len(g)} triples.") if __name__ == "__main__": convert_data()