RideSearch / app.py
Levimichael4's picture
Upload app.py
579e82e verified
import os, glob, json, requests
import numpy as np
import pandas as pd
import gradio as gr
from sklearn.preprocessing import StandardScaler
try:
from sentence_transformers import SentenceTransformer
except Exception:
SentenceTransformer = None
try:
from sklearn.feature_extraction.text import HashingVectorizer
except Exception:
HashingVectorizer = None
CSV_MAIN = "RideSearch_dataset.csv"
CSV_PARTS_GLOB = "RideSearch_part*_small.csv"
EMB_TEXT_NPY = "emb_text.npy"
EMB_NUM_NPY = "emb_num.npy"
TRIMS_OVERRIDES = "trims_overrides.json"
EMBED_MODEL = os.environ.get("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
NUM_COLS_CANON = [
"horsepower","zero_to_100_kmh_s","seats","cargo_liters","price_usd",
"popularity_score","comfort_score","reliability_score","tech_score",
"ownership_cost_score","safety_rating","year"
]
REAL_TRIMS = {
("BMW","1 Series"): ["116i","118i","120i","125i","M135i"],
("BMW","2 Series"): ["218i","220i","225i","230i","M235i","M240i"],
("BMW","3 Series"): ["318i","320i","325i","330i","330e","335i","340i","M340i","M3"],
("BMW","4 Series"): ["420i","430i","435i","440i","M440i","M4"],
("BMW","5 Series"): ["520i","525i","530i","530e","535i","540i","550i","M550i","M5"],
("BMW","6 Series"): ["630i","640i","650i","M6"],
("BMW","7 Series"): ["730i","740i","750i","760i","M760i"],
("BMW","8 Series"): ["840i","850i","M850i","M8"],
("BMW","X1"): ["sDrive18i","sDrive20i","xDrive20i","xDrive25i"],
("BMW","X2"): ["sDrive18i","sDrive20i","xDrive20i","xDrive25i","M35i"],
("BMW","X3"): ["sDrive20i","xDrive20i","xDrive30i","M40i","X3 M"],
("BMW","X4"): ["xDrive20i","xDrive30i","M40i","X4 M"],
("BMW","X5"): ["sDrive40i","xDrive40i","xDrive45e","M50i","X5 M"],
("BMW","X6"): ["sDrive40i","xDrive40i","M50i","X6 M"],
("BMW","X7"): ["xDrive40i","xDrive50i","M50i"],
("BMW","Z4"): ["sDrive20i","sDrive30i","M40i"],
("Audi","A1"): ["25 TFSI","30 TFSI","35 TFSI","S1"],
("Audi","A3"): ["30 TFSI","35 TFSI","40 TFSI","45 TFSI e","S3","RS3"],
("Audi","A4"): ["35 TFSI","40 TFSI","45 TFSI","45 TFSI e","S4","RS4"],
("Audi","A5"): ["35 TFSI","40 TFSI","45 TFSI","S5","RS5"],
("Audi","A6"): ["40 TFSI","45 TFSI","50 TFSI","55 TFSI e","S6","RS6"],
("Audi","A7"): ["45 TFSI","50 TFSI","55 TFSI e","S7","RS7"],
("Audi","A8"): ["50 TFSI","55 TFSI","60 TFSI e","S8"],
("Audi","Q2"): ["30 TFSI","35 TFSI","40 TFSI"],
("Audi","Q3"): ["35 TFSI","40 TFSI","45 TFSI e","RS Q3"],
("Audi","Q5"): ["40 TFSI","45 TFSI e","SQ5"],
("Audi","Q7"): ["45 TFSI","50 TDI","55 TFSI e","SQ7"],
("Audi","Q8"): ["50 TDI","55 TFSI","SQ8","RS Q8"],
("Audi","TT"): ["40 TFSI","45 TFSI","TTS","TT RS"],
("Audi","R8"): ["V10","V10 Performance","V10 Plus"],
("Mercedes-Benz","A-Class"): ["A180","A200","A220","A250","A250e","AMG A35","AMG A45"],
("Mercedes-Benz","B-Class"): ["B180","B200","B220","B250e"],
("Mercedes-Benz","C-Class"): ["C180","C200","C220d","C250","C300","C350e","AMG C43","AMG C63"],
("Mercedes-Benz","E-Class"): ["E200","E220d","E250","E300","E350","E400","E450","AMG E53","AMG E63"],
("Mercedes-Benz","S-Class"): ["S350","S400","S450","S500","S580","S600","AMG S63","AMG S65"],
("Mercedes-Benz","CLA"): ["CLA180","CLA200","CLA220","CLA250","AMG CLA35","AMG CLA45"],
("Mercedes-Benz","CLS"): ["CLS350","CLS400","CLS450","AMG CLS53","AMG CLS63"],
("Mercedes-Benz","GLA"): ["GLA180","GLA200","GLA220","GLA250","AMG GLA35","AMG GLA45"],
("Mercedes-Benz","GLB"): ["GLB200","GLB220","GLB250","AMG GLB35"],
("Mercedes-Benz","GLC"): ["GLC200","GLC220d","GLC300","GLC350e","AMG GLC43","AMG GLC63"],
("Mercedes-Benz","GLE"): ["GLE300","GLE350","GLE400","GLE450","GLE580","AMG GLE53","AMG GLE63"],
("Mercedes-Benz","GLS"): ["GLS400","GLS450","GLS580","AMG GLS63"],
("Mercedes-Benz","G-Class"): ["G350","G400","G500","G550","AMG G63","AMG G65"],
("Toyota","Corolla"): ["L","LE","XLE","SE","XSE","GR Corolla"],
("Toyota","Camry"): ["L","LE","SE","XLE","XSE","TRD"],
("Toyota","Avalon"): ["XLE","XSE","Limited","TRD"],
("Toyota","Prius"): ["L","LE","XLE","Limited","Prime"],
("Toyota","RAV4"): ["LE","XLE","XLE Premium","Adventure","TRD Off-Road","Limited","Prime"],
("Toyota","Highlander"): ["L","LE","XLE","Limited","Platinum","Hybrid"],
("Toyota","4Runner"): ["SR5","TRD Off-Road","TRD Pro","Limited"],
("Toyota","Tacoma"): ["SR","SR5","TRD Sport","TRD Off-Road","TRD Pro","Limited"],
("Toyota","Tundra"): ["SR","SR5","Limited","Platinum","1794","TRD Pro"],
("Toyota","Land Cruiser"): ["Base","Heritage Edition"],
("Toyota","Supra"): ["2.0","3.0","3.0 Premium"],
("Toyota","Yaris"): ["L","LE","XLE","GRMN"],
("Toyota","C-HR"): ["LE","XLE","Nightshade","Limited"],
("Toyota","Sequoia"): ["SR5","Limited","Platinum","TRD Pro","Capstone"],
("Toyota","Sienna"): ["LE","XLE","XSE","Limited","Platinum"],
("Honda","Civic"): ["LX","Sport","EX","EX-L","Sport Touring","Touring","Si","Type R"],
("Honda","Accord"): ["LX","Sport","EX","EX-L","Touring","Sport-L"],
("Honda","CR-V"): ["LX","EX","EX-L","Touring","Hybrid"],
("Honda","HR-V"): ["LX","Sport","EX","EX-L"],
("Honda","Pilot"): ["LX","EX","EX-L","Touring","Elite","TrailSport"],
("Honda","Passport"): ["Sport","EX-L","Touring","Elite","TrailSport"],
("Honda","Ridgeline"): ["Sport","RTL","RTL-E","Black Edition"],
("Honda","Insight"): ["LX","EX","Touring"],
("Honda","Fit / Jazz"): ["LX","Sport","EX","EX-L"],
("Honda","Odyssey"): ["EX","EX-L","Touring","Elite"],
("Volkswagen","Golf"): ["S","SE","SEL","Autobahn","GTI","R"],
("Volkswagen","Jetta"): ["S","SE","SEL","SEL Premium","GLI"],
("Volkswagen","Passat"): ["S","SE","SEL Premium","R-Line"],
("Volkswagen","Tiguan"): ["S","SE","SEL","SEL Premium","R-Line"],
("Volkswagen","Atlas"): ["S","SE","SEL","SEL Premium","Cross Sport"],
("Volkswagen","Arteon"): ["SE","SEL","SEL Premium","R-Line"],
("Volkswagen","ID.4"): ["Pro","Pro S","1st Edition"],
("Nissan","Sentra"): ["S","SV","SR"],
("Nissan","Altima"): ["S","SV","SL","SR","Platinum"],
("Nissan","Maxima"): ["S","SV","SL","SR","Platinum"],
("Nissan","Versa"): ["S","SV","SR"],
("Nissan","Rogue"): ["S","SV","SL","Platinum"],
("Nissan","Murano"): ["S","SV","SL","Platinum"],
("Nissan","Pathfinder"): ["S","SV","SL","Platinum"],
("Nissan","Armada"): ["SV","SL","Platinum"],
("Nissan","Frontier"): ["S","SV","PRO-4X"],
("Nissan","Titan"): ["S","SV","PRO-4X","Platinum Reserve"],
("Nissan","Z"): ["Sport","Performance"],
("Nissan","GT-R"): ["Premium","NISMO","Track Edition"],
("Nissan","Qashqai / Rogue Sport"): ["S","SV","SL"],
("Nissan","X-Trail"): ["Visia","Acenta","N-Connecta","Tekna"],
("Hyundai","Elantra"): ["SE","SEL","Limited","N Line","N"],
("Hyundai","Sonata"): ["SE","SEL","Limited","N Line"],
("Hyundai","Accent"): ["SE","SEL"],
("Hyundai","Tucson"): ["SE","SEL","Limited","N Line"],
("Hyundai","Santa Fe"): ["SE","SEL","Limited","Calligraphy"],
("Hyundai","Palisade"): ["SE","SEL","Limited","Calligraphy"],
("Hyundai","Kona"): ["SE","SEL","Limited","N Line","N"],
("Hyundai","Venue"): ["SE","SEL"],
("Hyundai","Ioniq 5"): ["SE","SEL","Limited"],
("Hyundai","Ioniq 6"): ["SE","SEL","Limited"],
("Hyundai","Santa Cruz"): ["SE","SEL","Night","Limited"],
("Hyundai","Ioniq 7 / SEVEN"): ["Concept","Preview"],
("Hyundai","Genesis G70"): ["2.0T","3.3T Sport"],
("Hyundai","Genesis G80"): ["2.5T","3.5T Sport"],
("Hyundai","Genesis G90"): ["3.3T Premium","5.0 Ultimate"],
("Kia","Rio"): ["LX","S"],
("Kia","Forte"): ["LX","S","EX","GT-Line","GT"],
("Kia","K5"): ["LX","S","EX","GT-Line"],
("Kia","Stinger"): ["GT-Line","GT1","GT2"],
("Kia","Soul"): ["LX","S","EX","GT-Line"],
("Kia","Seltos"): ["LX","S","EX","SX Turbo"],
("Kia","Sportage"): ["LX","S","EX","SX Turbo"],
("Kia","Sorento"): ["LX","S","EX","SX","SX Turbo"],
("Kia","Telluride"): ["LX","S","EX","SX"],
("Kia","Carnival"): ["LX","S","EX","SX"],
("Kia","EV6"): ["Light","Wind","GT-Line"],
("Kia","Niro"): ["LX","EX","SX Touring"],
("Kia","EV9"): ["Light","Wind","Land","GT-Line"],
("Ford","Fiesta"): ["S","SE","Titanium","ST"],
("Ford","Focus"): ["S","SE","SEL","Titanium","ST","RS"],
("Ford","Fusion"): ["S","SE","SEL","Titanium","Sport"],
("Ford","Mustang"): ["EcoBoost","GT","Mach 1","Shelby GT350","Shelby GT500"],
("Ford","Escape"): ["S","SE","SEL","Titanium"],
("Ford","Edge"): ["SE","SEL","Titanium","ST"],
("Ford","Explorer"): ["Base","XLT","Limited","King Ranch","Platinum","ST"],
("Ford","Expedition"): ["XLT","Limited","King Ranch","Platinum"],
("Ford","F-150"): ["Regular Cab","SuperCab","SuperCrew","Raptor","Lightning"],
("Ford","Ranger"): ["XL","XLT","Lariat"],
("Ford","Bronco"): ["Base","Big Bend","Black Diamond","Outer Banks","Badlands","Wildtrak","Raptor"],
("Ford","Maverick"): ["XL","XLT","Lariat","Tremor"],
("Ford","Bronco Sport"): ["Base","Big Bend","Outer Banks","Badlands"],
("Chevrolet","Spark"): ["LS","LT","Premier"],
("Chevrolet","Sonic"): ["LS","LT","Premier"],
("Chevrolet","Cruze"): ["L","LS","LT","Premier"],
("Chevrolet","Malibu"): ["L","LS","LT","Premier"],
("Chevrolet","Impala"): ["LS","LT","Premier"],
("Chevrolet","Camaro"): ["1LS","1LT","2LT","1SS","2SS","ZL1"],
("Chevrolet","Corvette"): ["1LT","2LT","3LT","Z06","ZR1"],
("Chevrolet","Trax"): ["L","LS","LT","Premier"],
("Chevrolet","Equinox"): ["L","LS","LT","Premier"],
("Chevrolet","Traverse"): ["L","LS","LT","Premier","High Country"],
("Chevrolet","Tahoe"): ["LS","LT","RST","Premier","High Country"],
("Chevrolet","Suburban"): ["LS","LT","RST","Premier","High Country"],
("Chevrolet","Silverado 1500"): ["Work Truck","Custom","LT","RST","LTZ","High Country"],
("Chevrolet","Blazer"): ["LT","RS","Premier"],
("Chevrolet","Trailblazer"): ["LS","LT","ACTIV","RS"],
("Chevrolet","Bolt EV"): ["1LT","2LT"],
("Lexus","IS"): ["300","350","500 F SPORT Performance"],
("Lexus","ES"): ["250","300h","350"],
("Lexus","GS"): ["300","350","450h","F"],
("Lexus","LS"): ["500","500h"],
("Lexus","LC"): ["500","500h"],
("Lexus","RC"): ["300","350","F"],
("Lexus","UX"): ["200","250h"],
("Lexus","NX"): ["250","350","350h","450h+"],
("Lexus","RX"): ["350","350h","500h F SPORT Performance"],
("Lexus","GX"): ["460"],
("Lexus","LX"): ["570","600"],
("Infiniti","Q50"): ["Pure","Luxe","Sensory","Red Sport 400"],
("Infiniti","Q60"): ["Pure","Luxe","Sensory","Red Sport 400"],
("Infiniti","Q70"): ["Base","Sport"],
("Infiniti","QX50"): ["Pure","Luxe","Sensory","Autograph"],
("Infiniti","QX60"): ["Pure","Luxe","Sensory","Autograph"],
("Infiniti","QX80"): ["Pure","Luxe","Sensory","Autograph"],
("Infiniti","QX55"): ["Luxe","Essential","Sensory"],
("Acura","ILX"): ["Base","Premium","A-Spec"],
("Acura","TLX"): ["Base","Technology","A-Spec","Advance","Type S"],
("Acura","RLX"): ["Base","Technology","Advance"],
("Acura","NSX"): ["Base","Type S"],
("Acura","RDX"): ["Base","Technology","A-Spec","Advance"],
("Acura","MDX"): ["Base","Technology","A-Spec","Advance","Type S"],
("Cadillac","ATS"): ["Base","Luxury","Premium Luxury","V-Sport","V"],
("Cadillac","CTS"): ["Base","Luxury","Premium Luxury","V-Sport","V"],
("Cadillac","CT4"): ["Luxury","Premium Luxury","Sport","V-Series","V-Series Blackwing"],
("Cadillac","CT5"): ["Luxury","Premium Luxury","Sport","V-Series","V-Series Blackwing"],
("Cadillac","XTS"): ["Base","Luxury","Premium Luxury","Platinum"],
("Cadillac","XT4"): ["Luxury","Premium Luxury","Sport"],
("Cadillac","XT5"): ["Luxury","Premium Luxury","Sport"],
("Cadillac","XT6"): ["Luxury","Premium Luxury","Sport"],
("Cadillac","Escalade"): ["Luxury","Premium Luxury","Sport","V-Series"],
("Lincoln","MKZ"): ["Premiere","Select","Reserve"],
("Lincoln","Continental"): ["Premiere","Select","Reserve","Coach Door Edition"],
("Lincoln","Corsair"): ["Base","Reserve"],
("Lincoln","Nautilus"): ["Base","Reserve"],
("Lincoln","Aviator"): ["Premiere","Reserve","Grand Touring","Black Label"],
("Lincoln","Navigator"): ["Premiere","Select","Reserve","Black Label"],
("Buick","Verano"): ["Base","Convenience","Leather","Premium"],
("Buick","Regal"): ["1SV","Preferred","Essence","GS"],
("Buick","LaCrosse"): ["Base","Preferred","Essence","Premium","Avenir"],
("Buick","Encore"): ["Base","Convenience","Leather","Premium"],
("Buick","Envision"): ["Preferred","Essence","Premium","Avenir"],
("Buick","Enclave"): ["Base","Essence","Premium","Avenir"],
("Genesis","G70"): ["2.0T","2.0T Advanced","3.3T Sport","3.3T Sport Prestige"],
("Genesis","G80"): ["2.5T","2.5T Advanced","3.5T Sport","3.5T Sport Prestige"],
("Genesis","G90"): ["3.3T Premium","3.3T Prestige","5.0 Ultimate"],
("Genesis","GV70"): ["2.5T","2.5T Advanced","3.5T Sport","3.5T Sport Prestige"],
("Genesis","GV80"): ["2.5T","2.5T Advanced","3.5T","3.5T Prestige"],
("Tesla","Model S"): ["Standard Range","Long Range","Plaid"],
("Tesla","Model 3"): ["Standard Range Plus","Long Range","Performance"],
("Tesla","Model X"): ["Standard Range","Long Range","Plaid"],
("Tesla","Model Y"): ["Standard Range","Long Range","Performance"],
("Porsche","911"): ["Carrera","Carrera S","Carrera 4","Carrera 4S","Turbo","Turbo S","GT3","GT3 RS","GT2 RS"],
("Porsche","Boxster"): ["Base","S","GTS","Spyder"],
("Porsche","Cayman"): ["Base","S","GTS","GT4"],
("Porsche","Panamera"): ["Base","4","S","4S","GTS","Turbo","Turbo S"],
("Porsche","Macan"): ["Base","S","GTS","Turbo"],
("Porsche","Cayenne"): ["Base","S","GTS","Turbo","Turbo S","E-Hybrid"],
("Porsche","Taycan"): ["Base","4S","Turbo","Turbo S"],
("Jaguar","XE"): ["Base","Premium","Prestige","R-Sport","S"],
("Jaguar","XF"): ["Premium","Prestige","R-Sport","S"],
("Jaguar","XJ"): ["Premium Luxury","Portfolio","Autobiography","XJR575"],
("Jaguar","F-TYPE"): ["Base","Premium","R-Dynamic","R","SVR"],
("Jaguar","E-PACE"): ["Base","S","SE","HSE","R-Dynamic"],
("Jaguar","F-PACE"): ["Premium","Prestige","R-Sport","S","SVR"],
("Jaguar","I-PACE"): ["S","SE","HSE","First Edition"],
("Land Rover","Range Rover Evoque"): ["S","SE","HSE","HSE Dynamic","Autobiography"],
("Land Rover","Range Rover Velar"): ["S","SE","HSE","R-Dynamic","P380","P550"],
("Land Rover","Range Rover Sport"): ["HSE","HSE Dynamic","Autobiography","SVR"],
("Land Rover","Range Rover"): ["Base","HSE","Autobiography","SV","SVAutobiography"],
("Land Rover","Discovery Sport"): ["S","SE","HSE","HSE Luxury"],
("Land Rover","Discovery"): ["S","SE","HSE","HSE Luxury"],
("Land Rover","Defender"): ["90","110","130","X","X-Dynamic","First Edition"],
("Volvo","S60"): ["T5 Momentum","T5 R-Design","T6 R-Design","T8 Polestar"],
("Volvo","S90"): ["T5 Momentum","T6 Momentum","T6 R-Design","T8 Inscription"],
("Volvo","V60"): ["T5 Momentum","T5 R-Design","T6 R-Design","T8 Polestar"],
("Volvo","V90"): ["T5 Momentum","T6 Momentum","T6 R-Design","T8 Inscription"],
("Volvo","XC40"): ["T4 Momentum","T5 Momentum","T5 R-Design","Recharge"],
("Volvo","XC60"): ["Core","Plus","Ultimate","Polestar Engineered"],
("Volvo","XC90"): ["Momentum","R-Design","Inscription","Recharge"],
("MINI","Cooper"): ["One","Cooper","Cooper S","John Cooper Works"],
("Mini","Cooper"): ["Base","Classic","Signature","Iconic","John Cooper Works"],
("Mini","Countryman"): ["Classic","Signature","Iconic","John Cooper Works"],
("Mini","Clubman"): ["Classic","Signature","Iconic","John Cooper Works"],
("Mazda","Mazda3"): ["Base","Select","Preferred","Premium","Turbo"],
("Mazda","Mazda6"): ["Sport","Touring","Grand Touring","Grand Touring Reserve","Signature"],
("Mazda","CX-3"): ["Sport","Touring","Grand Touring"],
("Mazda","CX-30"): ["S","Select","Preferred","Premium","Turbo","Turbo Premium Plus"],
("Mazda","CX-5"): ["S","Select","Preferred","Premium","Turbo","Turbo Signature"],
("Mazda","CX-50"): ["2.5 S","2.5 S Select","2.5 S Preferred","2.5 Turbo","2.5 Turbo Premium"],
("Mazda","CX-9"): ["Sport","Touring","Grand Touring","Signature","Carbon Edition"],
("Mazda","MX-5 Miata"): ["Sport","Club","Grand Touring"],
("Mazda","MX-30"): ["EV","EV Premium Plus"],
("Subaru","Impreza"): ["Base","Premium","Sport","Limited"],
("Subaru","WRX"): ["Base","Premium","Limited","GT","STI"],
("Subaru","BRZ"): ["Premium","Limited","tS"],
("Subaru","Legacy"): ["Base","Premium","Sport","Limited","Touring XT"],
("Subaru","Outback"): ["Base","Premium","Limited","Onyx Edition XT","Wilderness","Touring XT"],
("Subaru","Forester"): ["Base","Premium","Sport","Wilderness","Limited","Touring"],
("Subaru","Crosstrek"): ["Base","Premium","Sport","Limited","Hybrid"],
("Subaru","Ascent"): ["Base","Premium","Onyx Edition","Limited","Touring"],
("Mitsubishi","Mirage"): ["ES","LE","SE","GT"],
("Mitsubishi","Outlander"): ["ES","SE","SEL","Black Edition","GT","PHEV"],
("Mitsubishi","Outlander Sport"): ["S","ES","LE","SE","GT"],
("Mitsubishi","Eclipse Cross"): ["ES","LE","SE","SEL"],
("Mitsubishi","Pajero"): ["GL","GLS","Exceed"],
("Mitsubishi","L200 / Triton"): ["GLX","GLS","Exceed"],
("Peugeot","208"): ["Active","Allure","GT"],
("Peugeot","2008"): ["Active","Allure","GT"],
("Peugeot","308"): ["Active","Allure","GT"],
("Peugeot","3008"): ["Active","Allure","GT","GT Pack"],
("Peugeot","5008"): ["Active","Allure","GT"],
("Peugeot","508"): ["Active","Allure","GT","PSE"],
("Renault","Clio"): ["Play","Iconic","S Edition","RS Line"],
("Renault","Megane"): ["Play","Iconic","RS Line","RS Trophy"],
("Renault","Captur"): ["Play","Iconic","S Edition","RS Line"],
("Renault","Kadjar"): ["Play","Iconic","S Edition","GT Line"],
("Renault","Arkana"): ["Iconic","S Edition","RS Line"],
("Renault","Austral"): ["Equilibre","Techno","Esprit Alpine"],
("Skoda","Fabia"): ["S","SE","SE L","Monte Carlo"],
("Skoda","Octavia"): ["S","SE","SE L","SportLine","vRS"],
("Skoda","Superb"): ["SE","SE L","SportLine","L&K"],
("Skoda","Karoq"): ["SE Drive","SE L","SportLine"],
("Skoda","Kodiaq"): ["SE","SE L","SportLine","vRS"],
("Seat","Ibiza"): ["Reference","Style","Xcellence","FR"],
("Seat","Leon"): ["Reference","Style","Xcellence","FR","Cupra"],
("Seat","Arona"): ["Reference","Style","Xcellence","FR"],
("Seat","Ateca"): ["Reference","Style","Xcellence","FR"],
("Seat","Tarraco"): ["SE","SE Technology","Xcellence","FR"],
}
def _norm(x): return str(x or "").strip().lower()
REAL_TRIMS_N = {(_norm(mk), _norm(md)): trims for (mk, md), trims in REAL_TRIMS.items()}
def load_df():
if os.path.exists(CSV_MAIN):
return pd.read_csv(CSV_MAIN)
parts = sorted(glob.glob(CSV_PARTS_GLOB))
if parts:
df = pd.concat([pd.read_csv(p) for p in parts], ignore_index=True)
df.to_csv(CSV_MAIN, index=False)
return df
raise FileNotFoundError("Upload RideSearch_dataset.csv or the parts RideSearch_part*_small.csv.")
def unify_columns(df: pd.DataFrame) -> pd.DataFrame:
df = df.copy()
if "popularity" in df.columns and "popularity_score" not in df.columns: df["popularity_score"] = df["popularity"]
if "comfort" in df.columns and "comfort_score" not in df.columns: df["comfort_score"] = df["comfort"]
if "reliability" in df.columns and "reliability_score" not in df.columns: df["reliability_score"] = df["reliability"]
if "safety" in df.columns and "safety_rating" not in df.columns: df["safety_rating"] = df["safety"]
if "trim_display" not in df.columns: df["trim_display"] = df["trim"] if "trim" in df.columns else ""
for c in NUM_COLS_CANON:
if c not in df.columns: df[c] = np.nan
for c in NUM_COLS_CANON: df[c] = pd.to_numeric(df[c], errors="coerce")
if "text_record" not in df.columns:
cols = ["make","model","trim_display","body_type","fuel","engine_type"]
cols = [c for c in cols if c in df.columns]
df["text_record"] = df[cols].fillna("").astype(str).agg(" ".join, axis=1)
for col in ["make","model","body_type","fuel","engine_type","name"]:
if col not in df.columns: df[col] = ""
return df
def suspicious_zero_to_100(series: pd.Series) -> bool:
s = series.dropna()
if len(s) < 20: return False
return (s.nunique() <= max(2, int(0.03*len(s)))) or (s.std() < 0.18)
def estimate_0_100(row):
try: hp = float(row.get("horsepower", 150) or 150)
except Exception: hp = 150.0
body = str(row.get("body_type","")).lower()
fuel = str(row.get("fuel","")).lower()
trim = f"{row.get('model','')} {row.get('trim_display','')}".lower()
base = 26.0 - 3.2*np.log(max(hp, 60.0))
if any(k in body for k in ["suv","crossover","pickup","truck","van"]): base += 0.7
if any(k in body for k in ["coupe","roadster"]): base -= 0.4
if "electric" in fuel or "ev" in fuel: base -= 0.8
if "hybrid" in fuel: base -= 0.3
if "diesel" in fuel: base += 0.2
if any(x in trim for x in [" m "," amg","rs","type r","sti","gts","gt3","hellcat","svr","cupra","john cooper works"]): base -= 0.6
return float(np.clip(round(base,2), 2.8, 14.5))
def ensure_embeddings(df):
txt_ok = os.path.exists(EMB_TEXT_NPY)
num_ok = os.path.exists(EMB_NUM_NPY)
if txt_ok and num_ok:
return np.load(EMB_TEXT_NPY), np.load(EMB_NUM_NPY)
if SentenceTransformer is not None:
model = SentenceTransformer(EMBED_MODEL)
Etext = model.encode(df["text_record"].astype(str).tolist(), batch_size=256, show_progress_bar=False, normalize_embeddings=True).astype("float32")
else:
if HashingVectorizer is None: raise RuntimeError("Install sentence-transformers or scikit-learn for text embeddings.")
hv = HashingVectorizer(n_features=512, alternate_sign=False, norm="l2")
Etext = hv.transform(df["text_record"].fillna("").tolist()).toarray().astype("float32")
np.save(EMB_TEXT_NPY, Etext)
X = df[["horsepower","zero_to_100_kmh_s","seats","cargo_liters","price_usd","popularity_score","comfort_score","reliability_score","tech_score","ownership_cost_score","safety_rating"]].copy()
X = X.fillna(X.mean(numeric_only=True))
scaler = StandardScaler()
Enum = scaler.fit_transform(X.values.astype("float32")).astype("float32")
np.save(EMB_NUM_NPY, Enum)
return Etext, Enum
def load_overrides():
if os.path.exists(TRIMS_OVERRIDES):
try:
with open(TRIMS_OVERRIDES, "r", encoding="utf-8") as f: return json.load(f)
except Exception: return {}
return {}
def save_overrides(data):
with open(TRIMS_OVERRIDES, "w", encoding="utf-8") as f: json.dump(data, f, indent=2, ensure_ascii=False)
def learned_trims_from_csv(df):
out = {}
for (mk, md), sub in df.groupby(["make", "model"]):
vals = (sub["trim_display"].dropna().astype(str).map(str.strip).replace({"": np.nan}).dropna().value_counts().index.tolist())
if vals: out[(_norm(mk), _norm(md))] = vals[:20]
return out
def get_trims_for(make, model, learned, overrides):
if overrides.get(make, {}).get(model): return overrides[make][model]
mk, md = _norm(make), _norm(model)
if (mk, md) in REAL_TRIMS_N: return REAL_TRIMS_N[(mk, md)]
if (mk, md) in learned: return learned[(mk, md)]
return ["Base"]
def wiki_image(make, model, year=None):
q = f"{year} {make} {model}" if year else f"{make} {model}"
try:
r = requests.get("https://en.wikipedia.org/w/api.php", params={"action":"query","format":"json","prop":"pageimages","piprop":"thumbnail","pithumbsize":600,"generator":"search","gsrsearch":q,"gsrlimit":1,"gsrnamespace":0}, timeout=6).json()
pages = r.get("query",{}).get("pages",{})
if pages:
page = next(iter(pages.values()))
return page.get("thumbnail",{}).get("source")
except Exception:
pass
return None
def cosine_sim_row_vs_mat(mat, vec):
mat = mat / (np.linalg.norm(mat, axis=1, keepdims=True)+1e-9)
v = vec / (np.linalg.norm(vec)+1e-9)
return mat @ v
def recommend(df, Etext, Enum, make, model, trim, year, topk, alpha, body, fuel, y_min, y_max, p_min, p_max, safety, reliab, cross_brand_only=True, exclude_same_model=True, unique_brand=True):
sub = df[(df["make"]==make) & (df["model"]==model)].copy()
if trim: sub = sub[sub["trim_display"].astype(str).str.lower()==str(trim).lower()]
if year: sub = sub[pd.to_numeric(sub["year"], errors="coerce")==int(year)]
if sub.empty:
sub = df[(df["make"]==make) & (df["model"]==model)].copy()
if sub.empty: return "No such make/model in dataset.", None, "", []
a = sub.sort_values("popularity_score", ascending=False).iloc[0]
a_idx = int(a.name)
pool = df.copy()
if cross_brand_only: pool = pool[pool["make"] != a["make"]]
if exclude_same_model: pool = pool[~((pool["make"]==a["make"]) & (pool["model"]==a["model"]))]
if body and body!="Any": pool = pool[pool["body_type"]==body]
if fuel and fuel!="Any": pool = pool[pool["fuel"]==fuel]
pool = pool[(pool["year"]>=y_min) & (pool["year"]<=y_max)]
pool = pool[(pool["price_usd"]>=p_min) & (pool["price_usd"]<=p_max)]
pool = pool[(pool["safety_rating"]>=safety) & (pool["reliability_score"]>=reliab)]
if pool.empty: return "No cars after your filters. Loosen price/year/safety.", None, "", []
cand_idx = pool.index.values
st = cosine_sim_row_vs_mat(Etext[cand_idx], Etext[a_idx])
sn = cosine_sim_row_vs_mat(Enum[cand_idx], Enum[a_idx])
s = float(alpha)*st + (1-float(alpha))*sn
order = np.argsort(-s)
chosen, seen = [], set()
for j in order:
k = cand_idx[j]
brand = str(df.loc[k,"make"]).lower()
if unique_brand and brand in seen: continue
seen.add(brand)
chosen.append(k)
if len(chosen) >= int(topk): break
if not chosen: return "No recommendations after constraints.", None, "", []
out = df.loc[chosen].copy()
sim_lookup = {cand_idx[i]: float(s[i]) for i in range(len(cand_idx))}
out["similarity_%"] = [round(sim_lookup[k]*100,1) for k in chosen]
cols = ["name","make","model","trim_display","year","body_type","fuel","engine_type","price_usd","horsepower","zero_to_100_kmh_s","popularity_score","comfort_score","reliability_score","tech_score","ownership_cost_score","safety_rating","similarity_%"]
table = out[[c for c in cols if c in out.columns]].reset_index(drop=True)
disp_trim = str(a.get("trim_display","")).strip()
anchor_md = (f"**{a['make']} {a['model']} {disp_trim} {int(a['year']) if pd.notna(a['year']) else ''}** \n"
f"Body: {a.get('body_type','')} β€’ Fuel: {a.get('fuel','')} β€’ Engine: {a.get('engine_type','')} \n"
f"HP: {int(a['horsepower']) if pd.notna(a['horsepower']) else 'β€”'} β€’ 0–100: {a.get('zero_to_100_kmh_s','β€”')}s "
f"β€’ Price: ${int(a['price_usd']) if pd.notna(a['price_usd']) else 'β€”'} \n"
f"Popularity {int(a.get('popularity_score',0))}/10 β€’ Comfort {int(a.get('comfort_score',0))}/10 β€’ "
f"Reliability {int(a.get('reliability_score',0))}/100 β€’ Safety {int(a.get('safety_rating',0))}β˜…")
cards = []
for _, r in out.head(3).iterrows():
img = wiki_image(r.get("make",""), r.get("model",""), r.get("year",None))
title = f"{r.get('make','')} {r.get('model','')} {str(r.get('trim_display','')).strip()}"
cards.append((img, title))
return None, table, anchor_md, cards
def build_app():
df = unify_columns(load_df())
if suspicious_zero_to_100(df["zero_to_100_kmh_s"]): df["zero_to_100_kmh_s"] = df.apply(estimate_0_100, axis=1)
Etext, Enum = ensure_embeddings(df)
learned = learned_trims_from_csv(df)
overrides = load_overrides()
makes = sorted(df["make"].dropna().astype(str).unique().tolist())
body_choices = ["Any"] + sorted([b for b in df["body_type"].dropna().astype(str).unique().tolist() if b])
fuel_choices = ["Any"] + sorted([f for f in df["fuel"].dropna().astype(str).unique().tolist() if f])
y_lo, y_hi = int(df["year"].min(skipna=True)), int(df["year"].max(skipna=True))
p_lo, p_hi = int(df["price_usd"].min(skipna=True)), int(df["price_usd"].max(skipna=True))
def models_for(make):
if not make: return gr.update(choices=[], value=None)
opts = sorted(df.loc[df["make"].eq(make), "model"].dropna().astype(str).unique().tolist())
return gr.update(choices=opts, value=None)
def trims_years_for(make, model):
if not make or not model: return gr.update(choices=[], value=None), gr.update(choices=[], value=None)
trims = get_trims_for(make, model, learned, overrides)
years = sorted(pd.to_numeric(df[(df["make"]==make)&(df["model"]==model)]["year"], errors="coerce").dropna().astype(int).unique().tolist())
return gr.update(choices=trims, value=None), gr.update(choices=[None]+years, value=None)
with gr.Blocks(theme=gr.themes.Soft(), title="RideSearch") as demo:
gr.Markdown("## RideSearch β€” cross-brand recommendations with realistic trims & photos")
with gr.Tab("Pick & Recommend"):
with gr.Row():
mk = gr.Dropdown(makes, label="Make")
md = gr.Dropdown([], label="Model")
tr = gr.Dropdown([], label="Trim (optional)")
yr = gr.Dropdown([], label="Year (optional)")
mk.change(models_for, mk, md)
md.change(lambda a,b: trims_years_for(a,b), [mk, md], [tr, yr])
with gr.Row():
body = gr.Dropdown(body_choices, value="Any", label="Body")
fuel = gr.Dropdown(fuel_choices, value="Any", label="Fuel")
with gr.Row():
y_min = gr.Slider(y_lo, y_hi, value=y_lo, step=1, label="Year min")
y_max = gr.Slider(y_lo, y_hi, value=y_hi, step=1, label="Year max")
with gr.Row():
p_min = gr.Slider(p_lo, p_hi, value=p_lo, step=500, label="Price min (USD)")
p_max = gr.Slider(p_lo, p_hi, value=min(p_hi, max(p_lo+5000, p_lo+20000)), step=500, label="Price max (USD)")
with gr.Row():
safety = gr.Slider(3, 5, value=4, step=1, label="Min Safety β˜…")
reliab = gr.Slider(55, 99, value=70, step=1, label="Min Reliability")
with gr.Row():
topk = gr.Slider(1, 10, value=5, step=1, label="Recommendations")
alpha = gr.Slider(0, 1, value=0.7, step=0.05, label="Ξ± β€” Text vs Numeric")
with gr.Row():
cross = gr.Checkbox(True, label="Cross-brand only")
xmodel = gr.Checkbox(True, label="Exclude same model family")
uniqb = gr.Checkbox(True, label="Unique brands (no repeats)")
run = gr.Button("Recommend", variant="primary")
err = gr.Markdown()
anchor_md = gr.Markdown()
table = gr.Dataframe(interactive=False, wrap=True, label="Recommendations")
gallery = gr.Gallery(label="Photos", height=220, columns=[3])
def on_click(mk_, md_, tr_, yr_, topk_, alpha_, body_, fuel_, y_min_, y_max_, p_min_, p_max_, safety_, reliab_, cross_, xmodel_, uniqb_):
msg, tbl, atext, cards = recommend(
df, Etext, Enum, mk_, md_, tr_, yr_, int(topk_), float(alpha_),
body_, fuel_, int(y_min_), int(y_max_), int(p_min_), int(p_max_), int(safety_), int(reliab_),
cross_brand_only=bool(cross_), exclude_same_model=bool(xmodel_), unique_brand=bool(uniqb_)
)
if msg: return gr.update(value=f"**{msg}**"), gr.update(value=""), pd.DataFrame(), []
return gr.update(value=""), gr.update(value=atext), tbl, cards
run.click(on_click,
[mk, md, tr, yr, topk, alpha, body, fuel, y_min, y_max, p_min, p_max, safety, reliab, cross, xmodel, uniqb],
[err, anchor_md, table, gallery])
with gr.Tab("Admin β€’ Trim Overrides"):
gr.Markdown("Paste correct trims (one per line) for a model. Saved to trims_overrides.json.")
a_mk = gr.Dropdown(makes, label="Make")
a_md = gr.Dropdown([], label="Model")
a_txt = gr.Textbox(lines=8, label="Display trims (one per line)")
save_btn = gr.Button("Save override")
save_msg = gr.Markdown()
a_mk.change(models_for, a_mk, a_md)
def do_save(make, model, txt):
nonlocal overrides
trims = [t.strip() for t in str(txt).splitlines() if t.strip()]
if not make or not model or not trims: return "⚠️ Provide make, model, and at least one trim."
if make not in overrides: overrides[make] = {}
overrides[make][model] = trims
save_overrides(overrides)
return f"βœ… Saved {len(trims)} trims for {make} {model}. Refresh the Pick tab."
save_btn.click(do_save, [a_mk, a_md, a_txt], save_msg)
gr.Markdown("First run may be slow while embeddings build.")
return demo
if __name__ == "__main__":
demo = build_app()
demo.queue().launch(server_name="0.0.0.0", server_port=7860)