import os, glob, json, requests import numpy as np import pandas as pd import gradio as gr from sklearn.preprocessing import StandardScaler try: from sentence_transformers import SentenceTransformer except Exception: SentenceTransformer = None try: from sklearn.feature_extraction.text import HashingVectorizer except Exception: HashingVectorizer = None CSV_MAIN = "RideSearch_dataset.csv" CSV_PARTS_GLOB = "RideSearch_part*_small.csv" EMB_TEXT_NPY = "emb_text.npy" EMB_NUM_NPY = "emb_num.npy" TRIMS_OVERRIDES = "trims_overrides.json" EMBED_MODEL = os.environ.get("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2") NUM_COLS_CANON = [ "horsepower","zero_to_100_kmh_s","seats","cargo_liters","price_usd", "popularity_score","comfort_score","reliability_score","tech_score", "ownership_cost_score","safety_rating","year" ] REAL_TRIMS = { ("BMW","1 Series"): ["116i","118i","120i","125i","M135i"], ("BMW","2 Series"): ["218i","220i","225i","230i","M235i","M240i"], ("BMW","3 Series"): ["318i","320i","325i","330i","330e","335i","340i","M340i","M3"], ("BMW","4 Series"): ["420i","430i","435i","440i","M440i","M4"], ("BMW","5 Series"): ["520i","525i","530i","530e","535i","540i","550i","M550i","M5"], ("BMW","6 Series"): ["630i","640i","650i","M6"], ("BMW","7 Series"): ["730i","740i","750i","760i","M760i"], ("BMW","8 Series"): ["840i","850i","M850i","M8"], ("BMW","X1"): ["sDrive18i","sDrive20i","xDrive20i","xDrive25i"], ("BMW","X2"): ["sDrive18i","sDrive20i","xDrive20i","xDrive25i","M35i"], ("BMW","X3"): ["sDrive20i","xDrive20i","xDrive30i","M40i","X3 M"], ("BMW","X4"): ["xDrive20i","xDrive30i","M40i","X4 M"], ("BMW","X5"): ["sDrive40i","xDrive40i","xDrive45e","M50i","X5 M"], ("BMW","X6"): ["sDrive40i","xDrive40i","M50i","X6 M"], ("BMW","X7"): ["xDrive40i","xDrive50i","M50i"], ("BMW","Z4"): ["sDrive20i","sDrive30i","M40i"], ("Audi","A1"): ["25 TFSI","30 TFSI","35 TFSI","S1"], ("Audi","A3"): ["30 TFSI","35 TFSI","40 TFSI","45 TFSI e","S3","RS3"], ("Audi","A4"): ["35 TFSI","40 TFSI","45 TFSI","45 TFSI e","S4","RS4"], ("Audi","A5"): ["35 TFSI","40 TFSI","45 TFSI","S5","RS5"], ("Audi","A6"): ["40 TFSI","45 TFSI","50 TFSI","55 TFSI e","S6","RS6"], ("Audi","A7"): ["45 TFSI","50 TFSI","55 TFSI e","S7","RS7"], ("Audi","A8"): ["50 TFSI","55 TFSI","60 TFSI e","S8"], ("Audi","Q2"): ["30 TFSI","35 TFSI","40 TFSI"], ("Audi","Q3"): ["35 TFSI","40 TFSI","45 TFSI e","RS Q3"], ("Audi","Q5"): ["40 TFSI","45 TFSI e","SQ5"], ("Audi","Q7"): ["45 TFSI","50 TDI","55 TFSI e","SQ7"], ("Audi","Q8"): ["50 TDI","55 TFSI","SQ8","RS Q8"], ("Audi","TT"): ["40 TFSI","45 TFSI","TTS","TT RS"], ("Audi","R8"): ["V10","V10 Performance","V10 Plus"], ("Mercedes-Benz","A-Class"): ["A180","A200","A220","A250","A250e","AMG A35","AMG A45"], ("Mercedes-Benz","B-Class"): ["B180","B200","B220","B250e"], ("Mercedes-Benz","C-Class"): ["C180","C200","C220d","C250","C300","C350e","AMG C43","AMG C63"], ("Mercedes-Benz","E-Class"): ["E200","E220d","E250","E300","E350","E400","E450","AMG E53","AMG E63"], ("Mercedes-Benz","S-Class"): ["S350","S400","S450","S500","S580","S600","AMG S63","AMG S65"], ("Mercedes-Benz","CLA"): ["CLA180","CLA200","CLA220","CLA250","AMG CLA35","AMG CLA45"], ("Mercedes-Benz","CLS"): ["CLS350","CLS400","CLS450","AMG CLS53","AMG CLS63"], ("Mercedes-Benz","GLA"): ["GLA180","GLA200","GLA220","GLA250","AMG GLA35","AMG GLA45"], ("Mercedes-Benz","GLB"): ["GLB200","GLB220","GLB250","AMG GLB35"], ("Mercedes-Benz","GLC"): ["GLC200","GLC220d","GLC300","GLC350e","AMG GLC43","AMG GLC63"], ("Mercedes-Benz","GLE"): ["GLE300","GLE350","GLE400","GLE450","GLE580","AMG GLE53","AMG GLE63"], ("Mercedes-Benz","GLS"): ["GLS400","GLS450","GLS580","AMG GLS63"], ("Mercedes-Benz","G-Class"): ["G350","G400","G500","G550","AMG G63","AMG G65"], ("Toyota","Corolla"): ["L","LE","XLE","SE","XSE","GR Corolla"], ("Toyota","Camry"): ["L","LE","SE","XLE","XSE","TRD"], ("Toyota","Avalon"): ["XLE","XSE","Limited","TRD"], ("Toyota","Prius"): ["L","LE","XLE","Limited","Prime"], ("Toyota","RAV4"): ["LE","XLE","XLE Premium","Adventure","TRD Off-Road","Limited","Prime"], ("Toyota","Highlander"): ["L","LE","XLE","Limited","Platinum","Hybrid"], ("Toyota","4Runner"): ["SR5","TRD Off-Road","TRD Pro","Limited"], ("Toyota","Tacoma"): ["SR","SR5","TRD Sport","TRD Off-Road","TRD Pro","Limited"], ("Toyota","Tundra"): ["SR","SR5","Limited","Platinum","1794","TRD Pro"], ("Toyota","Land Cruiser"): ["Base","Heritage Edition"], ("Toyota","Supra"): ["2.0","3.0","3.0 Premium"], ("Toyota","Yaris"): ["L","LE","XLE","GRMN"], ("Toyota","C-HR"): ["LE","XLE","Nightshade","Limited"], ("Toyota","Sequoia"): ["SR5","Limited","Platinum","TRD Pro","Capstone"], ("Toyota","Sienna"): ["LE","XLE","XSE","Limited","Platinum"], ("Honda","Civic"): ["LX","Sport","EX","EX-L","Sport Touring","Touring","Si","Type R"], ("Honda","Accord"): ["LX","Sport","EX","EX-L","Touring","Sport-L"], ("Honda","CR-V"): ["LX","EX","EX-L","Touring","Hybrid"], ("Honda","HR-V"): ["LX","Sport","EX","EX-L"], ("Honda","Pilot"): ["LX","EX","EX-L","Touring","Elite","TrailSport"], ("Honda","Passport"): ["Sport","EX-L","Touring","Elite","TrailSport"], ("Honda","Ridgeline"): ["Sport","RTL","RTL-E","Black Edition"], ("Honda","Insight"): ["LX","EX","Touring"], ("Honda","Fit / Jazz"): ["LX","Sport","EX","EX-L"], ("Honda","Odyssey"): ["EX","EX-L","Touring","Elite"], ("Volkswagen","Golf"): ["S","SE","SEL","Autobahn","GTI","R"], ("Volkswagen","Jetta"): ["S","SE","SEL","SEL Premium","GLI"], ("Volkswagen","Passat"): ["S","SE","SEL Premium","R-Line"], ("Volkswagen","Tiguan"): ["S","SE","SEL","SEL Premium","R-Line"], ("Volkswagen","Atlas"): ["S","SE","SEL","SEL Premium","Cross Sport"], ("Volkswagen","Arteon"): ["SE","SEL","SEL Premium","R-Line"], ("Volkswagen","ID.4"): ["Pro","Pro S","1st Edition"], ("Nissan","Sentra"): ["S","SV","SR"], ("Nissan","Altima"): ["S","SV","SL","SR","Platinum"], ("Nissan","Maxima"): ["S","SV","SL","SR","Platinum"], ("Nissan","Versa"): ["S","SV","SR"], ("Nissan","Rogue"): ["S","SV","SL","Platinum"], ("Nissan","Murano"): ["S","SV","SL","Platinum"], ("Nissan","Pathfinder"): ["S","SV","SL","Platinum"], ("Nissan","Armada"): ["SV","SL","Platinum"], ("Nissan","Frontier"): ["S","SV","PRO-4X"], ("Nissan","Titan"): ["S","SV","PRO-4X","Platinum Reserve"], ("Nissan","Z"): ["Sport","Performance"], ("Nissan","GT-R"): ["Premium","NISMO","Track Edition"], ("Nissan","Qashqai / Rogue Sport"): ["S","SV","SL"], ("Nissan","X-Trail"): ["Visia","Acenta","N-Connecta","Tekna"], ("Hyundai","Elantra"): ["SE","SEL","Limited","N Line","N"], ("Hyundai","Sonata"): ["SE","SEL","Limited","N Line"], ("Hyundai","Accent"): ["SE","SEL"], ("Hyundai","Tucson"): ["SE","SEL","Limited","N Line"], ("Hyundai","Santa Fe"): ["SE","SEL","Limited","Calligraphy"], ("Hyundai","Palisade"): ["SE","SEL","Limited","Calligraphy"], ("Hyundai","Kona"): ["SE","SEL","Limited","N Line","N"], ("Hyundai","Venue"): ["SE","SEL"], ("Hyundai","Ioniq 5"): ["SE","SEL","Limited"], ("Hyundai","Ioniq 6"): ["SE","SEL","Limited"], ("Hyundai","Santa Cruz"): ["SE","SEL","Night","Limited"], ("Hyundai","Ioniq 7 / SEVEN"): ["Concept","Preview"], ("Hyundai","Genesis G70"): ["2.0T","3.3T Sport"], ("Hyundai","Genesis G80"): ["2.5T","3.5T Sport"], ("Hyundai","Genesis G90"): ["3.3T Premium","5.0 Ultimate"], ("Kia","Rio"): ["LX","S"], ("Kia","Forte"): ["LX","S","EX","GT-Line","GT"], ("Kia","K5"): ["LX","S","EX","GT-Line"], ("Kia","Stinger"): ["GT-Line","GT1","GT2"], ("Kia","Soul"): ["LX","S","EX","GT-Line"], ("Kia","Seltos"): ["LX","S","EX","SX Turbo"], ("Kia","Sportage"): ["LX","S","EX","SX Turbo"], ("Kia","Sorento"): ["LX","S","EX","SX","SX Turbo"], ("Kia","Telluride"): ["LX","S","EX","SX"], ("Kia","Carnival"): ["LX","S","EX","SX"], ("Kia","EV6"): ["Light","Wind","GT-Line"], ("Kia","Niro"): ["LX","EX","SX Touring"], ("Kia","EV9"): ["Light","Wind","Land","GT-Line"], ("Ford","Fiesta"): ["S","SE","Titanium","ST"], ("Ford","Focus"): ["S","SE","SEL","Titanium","ST","RS"], ("Ford","Fusion"): ["S","SE","SEL","Titanium","Sport"], ("Ford","Mustang"): ["EcoBoost","GT","Mach 1","Shelby GT350","Shelby GT500"], ("Ford","Escape"): ["S","SE","SEL","Titanium"], ("Ford","Edge"): ["SE","SEL","Titanium","ST"], ("Ford","Explorer"): ["Base","XLT","Limited","King Ranch","Platinum","ST"], ("Ford","Expedition"): ["XLT","Limited","King Ranch","Platinum"], ("Ford","F-150"): ["Regular Cab","SuperCab","SuperCrew","Raptor","Lightning"], ("Ford","Ranger"): ["XL","XLT","Lariat"], ("Ford","Bronco"): ["Base","Big Bend","Black Diamond","Outer Banks","Badlands","Wildtrak","Raptor"], ("Ford","Maverick"): ["XL","XLT","Lariat","Tremor"], ("Ford","Bronco Sport"): ["Base","Big Bend","Outer Banks","Badlands"], ("Chevrolet","Spark"): ["LS","LT","Premier"], ("Chevrolet","Sonic"): ["LS","LT","Premier"], ("Chevrolet","Cruze"): ["L","LS","LT","Premier"], ("Chevrolet","Malibu"): ["L","LS","LT","Premier"], ("Chevrolet","Impala"): ["LS","LT","Premier"], ("Chevrolet","Camaro"): ["1LS","1LT","2LT","1SS","2SS","ZL1"], ("Chevrolet","Corvette"): ["1LT","2LT","3LT","Z06","ZR1"], ("Chevrolet","Trax"): ["L","LS","LT","Premier"], ("Chevrolet","Equinox"): ["L","LS","LT","Premier"], ("Chevrolet","Traverse"): ["L","LS","LT","Premier","High Country"], ("Chevrolet","Tahoe"): ["LS","LT","RST","Premier","High Country"], ("Chevrolet","Suburban"): ["LS","LT","RST","Premier","High Country"], ("Chevrolet","Silverado 1500"): ["Work Truck","Custom","LT","RST","LTZ","High Country"], ("Chevrolet","Blazer"): ["LT","RS","Premier"], ("Chevrolet","Trailblazer"): ["LS","LT","ACTIV","RS"], ("Chevrolet","Bolt EV"): ["1LT","2LT"], ("Lexus","IS"): ["300","350","500 F SPORT Performance"], ("Lexus","ES"): ["250","300h","350"], ("Lexus","GS"): ["300","350","450h","F"], ("Lexus","LS"): ["500","500h"], ("Lexus","LC"): ["500","500h"], ("Lexus","RC"): ["300","350","F"], ("Lexus","UX"): ["200","250h"], ("Lexus","NX"): ["250","350","350h","450h+"], ("Lexus","RX"): ["350","350h","500h F SPORT Performance"], ("Lexus","GX"): ["460"], ("Lexus","LX"): ["570","600"], ("Infiniti","Q50"): ["Pure","Luxe","Sensory","Red Sport 400"], ("Infiniti","Q60"): ["Pure","Luxe","Sensory","Red Sport 400"], ("Infiniti","Q70"): ["Base","Sport"], ("Infiniti","QX50"): ["Pure","Luxe","Sensory","Autograph"], ("Infiniti","QX60"): ["Pure","Luxe","Sensory","Autograph"], ("Infiniti","QX80"): ["Pure","Luxe","Sensory","Autograph"], ("Infiniti","QX55"): ["Luxe","Essential","Sensory"], ("Acura","ILX"): ["Base","Premium","A-Spec"], ("Acura","TLX"): ["Base","Technology","A-Spec","Advance","Type S"], ("Acura","RLX"): ["Base","Technology","Advance"], ("Acura","NSX"): ["Base","Type S"], ("Acura","RDX"): ["Base","Technology","A-Spec","Advance"], ("Acura","MDX"): ["Base","Technology","A-Spec","Advance","Type S"], ("Cadillac","ATS"): ["Base","Luxury","Premium Luxury","V-Sport","V"], ("Cadillac","CTS"): ["Base","Luxury","Premium Luxury","V-Sport","V"], ("Cadillac","CT4"): ["Luxury","Premium Luxury","Sport","V-Series","V-Series Blackwing"], ("Cadillac","CT5"): ["Luxury","Premium Luxury","Sport","V-Series","V-Series Blackwing"], ("Cadillac","XTS"): ["Base","Luxury","Premium Luxury","Platinum"], ("Cadillac","XT4"): ["Luxury","Premium Luxury","Sport"], ("Cadillac","XT5"): ["Luxury","Premium Luxury","Sport"], ("Cadillac","XT6"): ["Luxury","Premium Luxury","Sport"], ("Cadillac","Escalade"): ["Luxury","Premium Luxury","Sport","V-Series"], ("Lincoln","MKZ"): ["Premiere","Select","Reserve"], ("Lincoln","Continental"): ["Premiere","Select","Reserve","Coach Door Edition"], ("Lincoln","Corsair"): ["Base","Reserve"], ("Lincoln","Nautilus"): ["Base","Reserve"], ("Lincoln","Aviator"): ["Premiere","Reserve","Grand Touring","Black Label"], ("Lincoln","Navigator"): ["Premiere","Select","Reserve","Black Label"], ("Buick","Verano"): ["Base","Convenience","Leather","Premium"], ("Buick","Regal"): ["1SV","Preferred","Essence","GS"], ("Buick","LaCrosse"): ["Base","Preferred","Essence","Premium","Avenir"], ("Buick","Encore"): ["Base","Convenience","Leather","Premium"], ("Buick","Envision"): ["Preferred","Essence","Premium","Avenir"], ("Buick","Enclave"): ["Base","Essence","Premium","Avenir"], ("Genesis","G70"): ["2.0T","2.0T Advanced","3.3T Sport","3.3T Sport Prestige"], ("Genesis","G80"): ["2.5T","2.5T Advanced","3.5T Sport","3.5T Sport Prestige"], ("Genesis","G90"): ["3.3T Premium","3.3T Prestige","5.0 Ultimate"], ("Genesis","GV70"): ["2.5T","2.5T Advanced","3.5T Sport","3.5T Sport Prestige"], ("Genesis","GV80"): ["2.5T","2.5T Advanced","3.5T","3.5T Prestige"], ("Tesla","Model S"): ["Standard Range","Long Range","Plaid"], ("Tesla","Model 3"): ["Standard Range Plus","Long Range","Performance"], ("Tesla","Model X"): ["Standard Range","Long Range","Plaid"], ("Tesla","Model Y"): ["Standard Range","Long Range","Performance"], ("Porsche","911"): ["Carrera","Carrera S","Carrera 4","Carrera 4S","Turbo","Turbo S","GT3","GT3 RS","GT2 RS"], ("Porsche","Boxster"): ["Base","S","GTS","Spyder"], ("Porsche","Cayman"): ["Base","S","GTS","GT4"], ("Porsche","Panamera"): ["Base","4","S","4S","GTS","Turbo","Turbo S"], ("Porsche","Macan"): ["Base","S","GTS","Turbo"], ("Porsche","Cayenne"): ["Base","S","GTS","Turbo","Turbo S","E-Hybrid"], ("Porsche","Taycan"): ["Base","4S","Turbo","Turbo S"], ("Jaguar","XE"): ["Base","Premium","Prestige","R-Sport","S"], ("Jaguar","XF"): ["Premium","Prestige","R-Sport","S"], ("Jaguar","XJ"): ["Premium Luxury","Portfolio","Autobiography","XJR575"], ("Jaguar","F-TYPE"): ["Base","Premium","R-Dynamic","R","SVR"], ("Jaguar","E-PACE"): ["Base","S","SE","HSE","R-Dynamic"], ("Jaguar","F-PACE"): ["Premium","Prestige","R-Sport","S","SVR"], ("Jaguar","I-PACE"): ["S","SE","HSE","First Edition"], ("Land Rover","Range Rover Evoque"): ["S","SE","HSE","HSE Dynamic","Autobiography"], ("Land Rover","Range Rover Velar"): ["S","SE","HSE","R-Dynamic","P380","P550"], ("Land Rover","Range Rover Sport"): ["HSE","HSE Dynamic","Autobiography","SVR"], ("Land Rover","Range Rover"): ["Base","HSE","Autobiography","SV","SVAutobiography"], ("Land Rover","Discovery Sport"): ["S","SE","HSE","HSE Luxury"], ("Land Rover","Discovery"): ["S","SE","HSE","HSE Luxury"], ("Land Rover","Defender"): ["90","110","130","X","X-Dynamic","First Edition"], ("Volvo","S60"): ["T5 Momentum","T5 R-Design","T6 R-Design","T8 Polestar"], ("Volvo","S90"): ["T5 Momentum","T6 Momentum","T6 R-Design","T8 Inscription"], ("Volvo","V60"): ["T5 Momentum","T5 R-Design","T6 R-Design","T8 Polestar"], ("Volvo","V90"): ["T5 Momentum","T6 Momentum","T6 R-Design","T8 Inscription"], ("Volvo","XC40"): ["T4 Momentum","T5 Momentum","T5 R-Design","Recharge"], ("Volvo","XC60"): ["Core","Plus","Ultimate","Polestar Engineered"], ("Volvo","XC90"): ["Momentum","R-Design","Inscription","Recharge"], ("MINI","Cooper"): ["One","Cooper","Cooper S","John Cooper Works"], ("Mini","Cooper"): ["Base","Classic","Signature","Iconic","John Cooper Works"], ("Mini","Countryman"): ["Classic","Signature","Iconic","John Cooper Works"], ("Mini","Clubman"): ["Classic","Signature","Iconic","John Cooper Works"], ("Mazda","Mazda3"): ["Base","Select","Preferred","Premium","Turbo"], ("Mazda","Mazda6"): ["Sport","Touring","Grand Touring","Grand Touring Reserve","Signature"], ("Mazda","CX-3"): ["Sport","Touring","Grand Touring"], ("Mazda","CX-30"): ["S","Select","Preferred","Premium","Turbo","Turbo Premium Plus"], ("Mazda","CX-5"): ["S","Select","Preferred","Premium","Turbo","Turbo Signature"], ("Mazda","CX-50"): ["2.5 S","2.5 S Select","2.5 S Preferred","2.5 Turbo","2.5 Turbo Premium"], ("Mazda","CX-9"): ["Sport","Touring","Grand Touring","Signature","Carbon Edition"], ("Mazda","MX-5 Miata"): ["Sport","Club","Grand Touring"], ("Mazda","MX-30"): ["EV","EV Premium Plus"], ("Subaru","Impreza"): ["Base","Premium","Sport","Limited"], ("Subaru","WRX"): ["Base","Premium","Limited","GT","STI"], ("Subaru","BRZ"): ["Premium","Limited","tS"], ("Subaru","Legacy"): ["Base","Premium","Sport","Limited","Touring XT"], ("Subaru","Outback"): ["Base","Premium","Limited","Onyx Edition XT","Wilderness","Touring XT"], ("Subaru","Forester"): ["Base","Premium","Sport","Wilderness","Limited","Touring"], ("Subaru","Crosstrek"): ["Base","Premium","Sport","Limited","Hybrid"], ("Subaru","Ascent"): ["Base","Premium","Onyx Edition","Limited","Touring"], ("Mitsubishi","Mirage"): ["ES","LE","SE","GT"], ("Mitsubishi","Outlander"): ["ES","SE","SEL","Black Edition","GT","PHEV"], ("Mitsubishi","Outlander Sport"): ["S","ES","LE","SE","GT"], ("Mitsubishi","Eclipse Cross"): ["ES","LE","SE","SEL"], ("Mitsubishi","Pajero"): ["GL","GLS","Exceed"], ("Mitsubishi","L200 / Triton"): ["GLX","GLS","Exceed"], ("Peugeot","208"): ["Active","Allure","GT"], ("Peugeot","2008"): ["Active","Allure","GT"], ("Peugeot","308"): ["Active","Allure","GT"], ("Peugeot","3008"): ["Active","Allure","GT","GT Pack"], ("Peugeot","5008"): ["Active","Allure","GT"], ("Peugeot","508"): ["Active","Allure","GT","PSE"], ("Renault","Clio"): ["Play","Iconic","S Edition","RS Line"], ("Renault","Megane"): ["Play","Iconic","RS Line","RS Trophy"], ("Renault","Captur"): ["Play","Iconic","S Edition","RS Line"], ("Renault","Kadjar"): ["Play","Iconic","S Edition","GT Line"], ("Renault","Arkana"): ["Iconic","S Edition","RS Line"], ("Renault","Austral"): ["Equilibre","Techno","Esprit Alpine"], ("Skoda","Fabia"): ["S","SE","SE L","Monte Carlo"], ("Skoda","Octavia"): ["S","SE","SE L","SportLine","vRS"], ("Skoda","Superb"): ["SE","SE L","SportLine","L&K"], ("Skoda","Karoq"): ["SE Drive","SE L","SportLine"], ("Skoda","Kodiaq"): ["SE","SE L","SportLine","vRS"], ("Seat","Ibiza"): ["Reference","Style","Xcellence","FR"], ("Seat","Leon"): ["Reference","Style","Xcellence","FR","Cupra"], ("Seat","Arona"): ["Reference","Style","Xcellence","FR"], ("Seat","Ateca"): ["Reference","Style","Xcellence","FR"], ("Seat","Tarraco"): ["SE","SE Technology","Xcellence","FR"], } def _norm(x): return str(x or "").strip().lower() REAL_TRIMS_N = {(_norm(mk), _norm(md)): trims for (mk, md), trims in REAL_TRIMS.items()} def load_df(): if os.path.exists(CSV_MAIN): return pd.read_csv(CSV_MAIN) parts = sorted(glob.glob(CSV_PARTS_GLOB)) if parts: df = pd.concat([pd.read_csv(p) for p in parts], ignore_index=True) df.to_csv(CSV_MAIN, index=False) return df raise FileNotFoundError("Upload RideSearch_dataset.csv or the parts RideSearch_part*_small.csv.") def unify_columns(df: pd.DataFrame) -> pd.DataFrame: df = df.copy() if "popularity" in df.columns and "popularity_score" not in df.columns: df["popularity_score"] = df["popularity"] if "comfort" in df.columns and "comfort_score" not in df.columns: df["comfort_score"] = df["comfort"] if "reliability" in df.columns and "reliability_score" not in df.columns: df["reliability_score"] = df["reliability"] if "safety" in df.columns and "safety_rating" not in df.columns: df["safety_rating"] = df["safety"] if "trim_display" not in df.columns: df["trim_display"] = df["trim"] if "trim" in df.columns else "" for c in NUM_COLS_CANON: if c not in df.columns: df[c] = np.nan for c in NUM_COLS_CANON: df[c] = pd.to_numeric(df[c], errors="coerce") if "text_record" not in df.columns: cols = ["make","model","trim_display","body_type","fuel","engine_type"] cols = [c for c in cols if c in df.columns] df["text_record"] = df[cols].fillna("").astype(str).agg(" ".join, axis=1) for col in ["make","model","body_type","fuel","engine_type","name"]: if col not in df.columns: df[col] = "" return df def suspicious_zero_to_100(series: pd.Series) -> bool: s = series.dropna() if len(s) < 20: return False return (s.nunique() <= max(2, int(0.03*len(s)))) or (s.std() < 0.18) def estimate_0_100(row): try: hp = float(row.get("horsepower", 150) or 150) except Exception: hp = 150.0 body = str(row.get("body_type","")).lower() fuel = str(row.get("fuel","")).lower() trim = f"{row.get('model','')} {row.get('trim_display','')}".lower() base = 26.0 - 3.2*np.log(max(hp, 60.0)) if any(k in body for k in ["suv","crossover","pickup","truck","van"]): base += 0.7 if any(k in body for k in ["coupe","roadster"]): base -= 0.4 if "electric" in fuel or "ev" in fuel: base -= 0.8 if "hybrid" in fuel: base -= 0.3 if "diesel" in fuel: base += 0.2 if any(x in trim for x in [" m "," amg","rs","type r","sti","gts","gt3","hellcat","svr","cupra","john cooper works"]): base -= 0.6 return float(np.clip(round(base,2), 2.8, 14.5)) def ensure_embeddings(df): txt_ok = os.path.exists(EMB_TEXT_NPY) num_ok = os.path.exists(EMB_NUM_NPY) if txt_ok and num_ok: return np.load(EMB_TEXT_NPY), np.load(EMB_NUM_NPY) if SentenceTransformer is not None: model = SentenceTransformer(EMBED_MODEL) Etext = model.encode(df["text_record"].astype(str).tolist(), batch_size=256, show_progress_bar=False, normalize_embeddings=True).astype("float32") else: if HashingVectorizer is None: raise RuntimeError("Install sentence-transformers or scikit-learn for text embeddings.") hv = HashingVectorizer(n_features=512, alternate_sign=False, norm="l2") Etext = hv.transform(df["text_record"].fillna("").tolist()).toarray().astype("float32") np.save(EMB_TEXT_NPY, Etext) X = df[["horsepower","zero_to_100_kmh_s","seats","cargo_liters","price_usd","popularity_score","comfort_score","reliability_score","tech_score","ownership_cost_score","safety_rating"]].copy() X = X.fillna(X.mean(numeric_only=True)) scaler = StandardScaler() Enum = scaler.fit_transform(X.values.astype("float32")).astype("float32") np.save(EMB_NUM_NPY, Enum) return Etext, Enum def load_overrides(): if os.path.exists(TRIMS_OVERRIDES): try: with open(TRIMS_OVERRIDES, "r", encoding="utf-8") as f: return json.load(f) except Exception: return {} return {} def save_overrides(data): with open(TRIMS_OVERRIDES, "w", encoding="utf-8") as f: json.dump(data, f, indent=2, ensure_ascii=False) def learned_trims_from_csv(df): out = {} for (mk, md), sub in df.groupby(["make", "model"]): vals = (sub["trim_display"].dropna().astype(str).map(str.strip).replace({"": np.nan}).dropna().value_counts().index.tolist()) if vals: out[(_norm(mk), _norm(md))] = vals[:20] return out def get_trims_for(make, model, learned, overrides): if overrides.get(make, {}).get(model): return overrides[make][model] mk, md = _norm(make), _norm(model) if (mk, md) in REAL_TRIMS_N: return REAL_TRIMS_N[(mk, md)] if (mk, md) in learned: return learned[(mk, md)] return ["Base"] def wiki_image(make, model, year=None): q = f"{year} {make} {model}" if year else f"{make} {model}" try: r = requests.get("https://en.wikipedia.org/w/api.php", params={"action":"query","format":"json","prop":"pageimages","piprop":"thumbnail","pithumbsize":600,"generator":"search","gsrsearch":q,"gsrlimit":1,"gsrnamespace":0}, timeout=6).json() pages = r.get("query",{}).get("pages",{}) if pages: page = next(iter(pages.values())) return page.get("thumbnail",{}).get("source") except Exception: pass return None def cosine_sim_row_vs_mat(mat, vec): mat = mat / (np.linalg.norm(mat, axis=1, keepdims=True)+1e-9) v = vec / (np.linalg.norm(vec)+1e-9) return mat @ v def recommend(df, Etext, Enum, make, model, trim, year, topk, alpha, body, fuel, y_min, y_max, p_min, p_max, safety, reliab, cross_brand_only=True, exclude_same_model=True, unique_brand=True): sub = df[(df["make"]==make) & (df["model"]==model)].copy() if trim: sub = sub[sub["trim_display"].astype(str).str.lower()==str(trim).lower()] if year: sub = sub[pd.to_numeric(sub["year"], errors="coerce")==int(year)] if sub.empty: sub = df[(df["make"]==make) & (df["model"]==model)].copy() if sub.empty: return "No such make/model in dataset.", None, "", [] a = sub.sort_values("popularity_score", ascending=False).iloc[0] a_idx = int(a.name) pool = df.copy() if cross_brand_only: pool = pool[pool["make"] != a["make"]] if exclude_same_model: pool = pool[~((pool["make"]==a["make"]) & (pool["model"]==a["model"]))] if body and body!="Any": pool = pool[pool["body_type"]==body] if fuel and fuel!="Any": pool = pool[pool["fuel"]==fuel] pool = pool[(pool["year"]>=y_min) & (pool["year"]<=y_max)] pool = pool[(pool["price_usd"]>=p_min) & (pool["price_usd"]<=p_max)] pool = pool[(pool["safety_rating"]>=safety) & (pool["reliability_score"]>=reliab)] if pool.empty: return "No cars after your filters. Loosen price/year/safety.", None, "", [] cand_idx = pool.index.values st = cosine_sim_row_vs_mat(Etext[cand_idx], Etext[a_idx]) sn = cosine_sim_row_vs_mat(Enum[cand_idx], Enum[a_idx]) s = float(alpha)*st + (1-float(alpha))*sn order = np.argsort(-s) chosen, seen = [], set() for j in order: k = cand_idx[j] brand = str(df.loc[k,"make"]).lower() if unique_brand and brand in seen: continue seen.add(brand) chosen.append(k) if len(chosen) >= int(topk): break if not chosen: return "No recommendations after constraints.", None, "", [] out = df.loc[chosen].copy() sim_lookup = {cand_idx[i]: float(s[i]) for i in range(len(cand_idx))} out["similarity_%"] = [round(sim_lookup[k]*100,1) for k in chosen] cols = ["name","make","model","trim_display","year","body_type","fuel","engine_type","price_usd","horsepower","zero_to_100_kmh_s","popularity_score","comfort_score","reliability_score","tech_score","ownership_cost_score","safety_rating","similarity_%"] table = out[[c for c in cols if c in out.columns]].reset_index(drop=True) disp_trim = str(a.get("trim_display","")).strip() anchor_md = (f"**{a['make']} {a['model']} {disp_trim} {int(a['year']) if pd.notna(a['year']) else ''}** \n" f"Body: {a.get('body_type','')} • Fuel: {a.get('fuel','')} • Engine: {a.get('engine_type','')} \n" f"HP: {int(a['horsepower']) if pd.notna(a['horsepower']) else '—'} • 0–100: {a.get('zero_to_100_kmh_s','—')}s " f"• Price: ${int(a['price_usd']) if pd.notna(a['price_usd']) else '—'} \n" f"Popularity {int(a.get('popularity_score',0))}/10 • Comfort {int(a.get('comfort_score',0))}/10 • " f"Reliability {int(a.get('reliability_score',0))}/100 • Safety {int(a.get('safety_rating',0))}★") cards = [] for _, r in out.head(3).iterrows(): img = wiki_image(r.get("make",""), r.get("model",""), r.get("year",None)) title = f"{r.get('make','')} {r.get('model','')} {str(r.get('trim_display','')).strip()}" cards.append((img, title)) return None, table, anchor_md, cards def build_app(): df = unify_columns(load_df()) if suspicious_zero_to_100(df["zero_to_100_kmh_s"]): df["zero_to_100_kmh_s"] = df.apply(estimate_0_100, axis=1) Etext, Enum = ensure_embeddings(df) learned = learned_trims_from_csv(df) overrides = load_overrides() makes = sorted(df["make"].dropna().astype(str).unique().tolist()) body_choices = ["Any"] + sorted([b for b in df["body_type"].dropna().astype(str).unique().tolist() if b]) fuel_choices = ["Any"] + sorted([f for f in df["fuel"].dropna().astype(str).unique().tolist() if f]) y_lo, y_hi = int(df["year"].min(skipna=True)), int(df["year"].max(skipna=True)) p_lo, p_hi = int(df["price_usd"].min(skipna=True)), int(df["price_usd"].max(skipna=True)) def models_for(make): if not make: return gr.update(choices=[], value=None) opts = sorted(df.loc[df["make"].eq(make), "model"].dropna().astype(str).unique().tolist()) return gr.update(choices=opts, value=None) def trims_years_for(make, model): if not make or not model: return gr.update(choices=[], value=None), gr.update(choices=[], value=None) trims = get_trims_for(make, model, learned, overrides) years = sorted(pd.to_numeric(df[(df["make"]==make)&(df["model"]==model)]["year"], errors="coerce").dropna().astype(int).unique().tolist()) return gr.update(choices=trims, value=None), gr.update(choices=[None]+years, value=None) with gr.Blocks(theme=gr.themes.Soft(), title="RideSearch") as demo: gr.Markdown("## RideSearch — cross-brand recommendations with realistic trims & photos") with gr.Tab("Pick & Recommend"): with gr.Row(): mk = gr.Dropdown(makes, label="Make") md = gr.Dropdown([], label="Model") tr = gr.Dropdown([], label="Trim (optional)") yr = gr.Dropdown([], label="Year (optional)") mk.change(models_for, mk, md) md.change(lambda a,b: trims_years_for(a,b), [mk, md], [tr, yr]) with gr.Row(): body = gr.Dropdown(body_choices, value="Any", label="Body") fuel = gr.Dropdown(fuel_choices, value="Any", label="Fuel") with gr.Row(): y_min = gr.Slider(y_lo, y_hi, value=y_lo, step=1, label="Year min") y_max = gr.Slider(y_lo, y_hi, value=y_hi, step=1, label="Year max") with gr.Row(): p_min = gr.Slider(p_lo, p_hi, value=p_lo, step=500, label="Price min (USD)") p_max = gr.Slider(p_lo, p_hi, value=min(p_hi, max(p_lo+5000, p_lo+20000)), step=500, label="Price max (USD)") with gr.Row(): safety = gr.Slider(3, 5, value=4, step=1, label="Min Safety ★") reliab = gr.Slider(55, 99, value=70, step=1, label="Min Reliability") with gr.Row(): topk = gr.Slider(1, 10, value=5, step=1, label="Recommendations") alpha = gr.Slider(0, 1, value=0.7, step=0.05, label="α — Text vs Numeric") with gr.Row(): cross = gr.Checkbox(True, label="Cross-brand only") xmodel = gr.Checkbox(True, label="Exclude same model family") uniqb = gr.Checkbox(True, label="Unique brands (no repeats)") run = gr.Button("Recommend", variant="primary") err = gr.Markdown() anchor_md = gr.Markdown() table = gr.Dataframe(interactive=False, wrap=True, label="Recommendations") gallery = gr.Gallery(label="Photos", height=220, columns=[3]) def on_click(mk_, md_, tr_, yr_, topk_, alpha_, body_, fuel_, y_min_, y_max_, p_min_, p_max_, safety_, reliab_, cross_, xmodel_, uniqb_): msg, tbl, atext, cards = recommend( df, Etext, Enum, mk_, md_, tr_, yr_, int(topk_), float(alpha_), body_, fuel_, int(y_min_), int(y_max_), int(p_min_), int(p_max_), int(safety_), int(reliab_), cross_brand_only=bool(cross_), exclude_same_model=bool(xmodel_), unique_brand=bool(uniqb_) ) if msg: return gr.update(value=f"**{msg}**"), gr.update(value=""), pd.DataFrame(), [] return gr.update(value=""), gr.update(value=atext), tbl, cards run.click(on_click, [mk, md, tr, yr, topk, alpha, body, fuel, y_min, y_max, p_min, p_max, safety, reliab, cross, xmodel, uniqb], [err, anchor_md, table, gallery]) with gr.Tab("Admin • Trim Overrides"): gr.Markdown("Paste correct trims (one per line) for a model. Saved to trims_overrides.json.") a_mk = gr.Dropdown(makes, label="Make") a_md = gr.Dropdown([], label="Model") a_txt = gr.Textbox(lines=8, label="Display trims (one per line)") save_btn = gr.Button("Save override") save_msg = gr.Markdown() a_mk.change(models_for, a_mk, a_md) def do_save(make, model, txt): nonlocal overrides trims = [t.strip() for t in str(txt).splitlines() if t.strip()] if not make or not model or not trims: return "⚠️ Provide make, model, and at least one trim." if make not in overrides: overrides[make] = {} overrides[make][model] = trims save_overrides(overrides) return f"✅ Saved {len(trims)} trims for {make} {model}. Refresh the Pick tab." save_btn.click(do_save, [a_mk, a_md, a_txt], save_msg) gr.Markdown("First run may be slow while embeddings build.") return demo if __name__ == "__main__": demo = build_app() demo.queue().launch(server_name="0.0.0.0", server_port=7860)