Spaces:
Sleeping
Sleeping
| import os, glob, json, requests | |
| import numpy as np | |
| import pandas as pd | |
| import gradio as gr | |
| from sklearn.preprocessing import StandardScaler | |
| try: | |
| from sentence_transformers import SentenceTransformer | |
| except Exception: | |
| SentenceTransformer = None | |
| try: | |
| from sklearn.feature_extraction.text import HashingVectorizer | |
| except Exception: | |
| HashingVectorizer = None | |
| CSV_MAIN = "RideSearch_dataset.csv" | |
| CSV_PARTS_GLOB = "RideSearch_part*_small.csv" | |
| EMB_TEXT_NPY = "emb_text.npy" | |
| EMB_NUM_NPY = "emb_num.npy" | |
| TRIMS_OVERRIDES = "trims_overrides.json" | |
| EMBED_MODEL = os.environ.get("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2") | |
| NUM_COLS_CANON = [ | |
| "horsepower","zero_to_100_kmh_s","seats","cargo_liters","price_usd", | |
| "popularity_score","comfort_score","reliability_score","tech_score", | |
| "ownership_cost_score","safety_rating","year" | |
| ] | |
| REAL_TRIMS = { | |
| ("BMW","1 Series"): ["116i","118i","120i","125i","M135i"], | |
| ("BMW","2 Series"): ["218i","220i","225i","230i","M235i","M240i"], | |
| ("BMW","3 Series"): ["318i","320i","325i","330i","330e","335i","340i","M340i","M3"], | |
| ("BMW","4 Series"): ["420i","430i","435i","440i","M440i","M4"], | |
| ("BMW","5 Series"): ["520i","525i","530i","530e","535i","540i","550i","M550i","M5"], | |
| ("BMW","6 Series"): ["630i","640i","650i","M6"], | |
| ("BMW","7 Series"): ["730i","740i","750i","760i","M760i"], | |
| ("BMW","8 Series"): ["840i","850i","M850i","M8"], | |
| ("BMW","X1"): ["sDrive18i","sDrive20i","xDrive20i","xDrive25i"], | |
| ("BMW","X2"): ["sDrive18i","sDrive20i","xDrive20i","xDrive25i","M35i"], | |
| ("BMW","X3"): ["sDrive20i","xDrive20i","xDrive30i","M40i","X3 M"], | |
| ("BMW","X4"): ["xDrive20i","xDrive30i","M40i","X4 M"], | |
| ("BMW","X5"): ["sDrive40i","xDrive40i","xDrive45e","M50i","X5 M"], | |
| ("BMW","X6"): ["sDrive40i","xDrive40i","M50i","X6 M"], | |
| ("BMW","X7"): ["xDrive40i","xDrive50i","M50i"], | |
| ("BMW","Z4"): ["sDrive20i","sDrive30i","M40i"], | |
| ("Audi","A1"): ["25 TFSI","30 TFSI","35 TFSI","S1"], | |
| ("Audi","A3"): ["30 TFSI","35 TFSI","40 TFSI","45 TFSI e","S3","RS3"], | |
| ("Audi","A4"): ["35 TFSI","40 TFSI","45 TFSI","45 TFSI e","S4","RS4"], | |
| ("Audi","A5"): ["35 TFSI","40 TFSI","45 TFSI","S5","RS5"], | |
| ("Audi","A6"): ["40 TFSI","45 TFSI","50 TFSI","55 TFSI e","S6","RS6"], | |
| ("Audi","A7"): ["45 TFSI","50 TFSI","55 TFSI e","S7","RS7"], | |
| ("Audi","A8"): ["50 TFSI","55 TFSI","60 TFSI e","S8"], | |
| ("Audi","Q2"): ["30 TFSI","35 TFSI","40 TFSI"], | |
| ("Audi","Q3"): ["35 TFSI","40 TFSI","45 TFSI e","RS Q3"], | |
| ("Audi","Q5"): ["40 TFSI","45 TFSI e","SQ5"], | |
| ("Audi","Q7"): ["45 TFSI","50 TDI","55 TFSI e","SQ7"], | |
| ("Audi","Q8"): ["50 TDI","55 TFSI","SQ8","RS Q8"], | |
| ("Audi","TT"): ["40 TFSI","45 TFSI","TTS","TT RS"], | |
| ("Audi","R8"): ["V10","V10 Performance","V10 Plus"], | |
| ("Mercedes-Benz","A-Class"): ["A180","A200","A220","A250","A250e","AMG A35","AMG A45"], | |
| ("Mercedes-Benz","B-Class"): ["B180","B200","B220","B250e"], | |
| ("Mercedes-Benz","C-Class"): ["C180","C200","C220d","C250","C300","C350e","AMG C43","AMG C63"], | |
| ("Mercedes-Benz","E-Class"): ["E200","E220d","E250","E300","E350","E400","E450","AMG E53","AMG E63"], | |
| ("Mercedes-Benz","S-Class"): ["S350","S400","S450","S500","S580","S600","AMG S63","AMG S65"], | |
| ("Mercedes-Benz","CLA"): ["CLA180","CLA200","CLA220","CLA250","AMG CLA35","AMG CLA45"], | |
| ("Mercedes-Benz","CLS"): ["CLS350","CLS400","CLS450","AMG CLS53","AMG CLS63"], | |
| ("Mercedes-Benz","GLA"): ["GLA180","GLA200","GLA220","GLA250","AMG GLA35","AMG GLA45"], | |
| ("Mercedes-Benz","GLB"): ["GLB200","GLB220","GLB250","AMG GLB35"], | |
| ("Mercedes-Benz","GLC"): ["GLC200","GLC220d","GLC300","GLC350e","AMG GLC43","AMG GLC63"], | |
| ("Mercedes-Benz","GLE"): ["GLE300","GLE350","GLE400","GLE450","GLE580","AMG GLE53","AMG GLE63"], | |
| ("Mercedes-Benz","GLS"): ["GLS400","GLS450","GLS580","AMG GLS63"], | |
| ("Mercedes-Benz","G-Class"): ["G350","G400","G500","G550","AMG G63","AMG G65"], | |
| ("Toyota","Corolla"): ["L","LE","XLE","SE","XSE","GR Corolla"], | |
| ("Toyota","Camry"): ["L","LE","SE","XLE","XSE","TRD"], | |
| ("Toyota","Avalon"): ["XLE","XSE","Limited","TRD"], | |
| ("Toyota","Prius"): ["L","LE","XLE","Limited","Prime"], | |
| ("Toyota","RAV4"): ["LE","XLE","XLE Premium","Adventure","TRD Off-Road","Limited","Prime"], | |
| ("Toyota","Highlander"): ["L","LE","XLE","Limited","Platinum","Hybrid"], | |
| ("Toyota","4Runner"): ["SR5","TRD Off-Road","TRD Pro","Limited"], | |
| ("Toyota","Tacoma"): ["SR","SR5","TRD Sport","TRD Off-Road","TRD Pro","Limited"], | |
| ("Toyota","Tundra"): ["SR","SR5","Limited","Platinum","1794","TRD Pro"], | |
| ("Toyota","Land Cruiser"): ["Base","Heritage Edition"], | |
| ("Toyota","Supra"): ["2.0","3.0","3.0 Premium"], | |
| ("Toyota","Yaris"): ["L","LE","XLE","GRMN"], | |
| ("Toyota","C-HR"): ["LE","XLE","Nightshade","Limited"], | |
| ("Toyota","Sequoia"): ["SR5","Limited","Platinum","TRD Pro","Capstone"], | |
| ("Toyota","Sienna"): ["LE","XLE","XSE","Limited","Platinum"], | |
| ("Honda","Civic"): ["LX","Sport","EX","EX-L","Sport Touring","Touring","Si","Type R"], | |
| ("Honda","Accord"): ["LX","Sport","EX","EX-L","Touring","Sport-L"], | |
| ("Honda","CR-V"): ["LX","EX","EX-L","Touring","Hybrid"], | |
| ("Honda","HR-V"): ["LX","Sport","EX","EX-L"], | |
| ("Honda","Pilot"): ["LX","EX","EX-L","Touring","Elite","TrailSport"], | |
| ("Honda","Passport"): ["Sport","EX-L","Touring","Elite","TrailSport"], | |
| ("Honda","Ridgeline"): ["Sport","RTL","RTL-E","Black Edition"], | |
| ("Honda","Insight"): ["LX","EX","Touring"], | |
| ("Honda","Fit / Jazz"): ["LX","Sport","EX","EX-L"], | |
| ("Honda","Odyssey"): ["EX","EX-L","Touring","Elite"], | |
| ("Volkswagen","Golf"): ["S","SE","SEL","Autobahn","GTI","R"], | |
| ("Volkswagen","Jetta"): ["S","SE","SEL","SEL Premium","GLI"], | |
| ("Volkswagen","Passat"): ["S","SE","SEL Premium","R-Line"], | |
| ("Volkswagen","Tiguan"): ["S","SE","SEL","SEL Premium","R-Line"], | |
| ("Volkswagen","Atlas"): ["S","SE","SEL","SEL Premium","Cross Sport"], | |
| ("Volkswagen","Arteon"): ["SE","SEL","SEL Premium","R-Line"], | |
| ("Volkswagen","ID.4"): ["Pro","Pro S","1st Edition"], | |
| ("Nissan","Sentra"): ["S","SV","SR"], | |
| ("Nissan","Altima"): ["S","SV","SL","SR","Platinum"], | |
| ("Nissan","Maxima"): ["S","SV","SL","SR","Platinum"], | |
| ("Nissan","Versa"): ["S","SV","SR"], | |
| ("Nissan","Rogue"): ["S","SV","SL","Platinum"], | |
| ("Nissan","Murano"): ["S","SV","SL","Platinum"], | |
| ("Nissan","Pathfinder"): ["S","SV","SL","Platinum"], | |
| ("Nissan","Armada"): ["SV","SL","Platinum"], | |
| ("Nissan","Frontier"): ["S","SV","PRO-4X"], | |
| ("Nissan","Titan"): ["S","SV","PRO-4X","Platinum Reserve"], | |
| ("Nissan","Z"): ["Sport","Performance"], | |
| ("Nissan","GT-R"): ["Premium","NISMO","Track Edition"], | |
| ("Nissan","Qashqai / Rogue Sport"): ["S","SV","SL"], | |
| ("Nissan","X-Trail"): ["Visia","Acenta","N-Connecta","Tekna"], | |
| ("Hyundai","Elantra"): ["SE","SEL","Limited","N Line","N"], | |
| ("Hyundai","Sonata"): ["SE","SEL","Limited","N Line"], | |
| ("Hyundai","Accent"): ["SE","SEL"], | |
| ("Hyundai","Tucson"): ["SE","SEL","Limited","N Line"], | |
| ("Hyundai","Santa Fe"): ["SE","SEL","Limited","Calligraphy"], | |
| ("Hyundai","Palisade"): ["SE","SEL","Limited","Calligraphy"], | |
| ("Hyundai","Kona"): ["SE","SEL","Limited","N Line","N"], | |
| ("Hyundai","Venue"): ["SE","SEL"], | |
| ("Hyundai","Ioniq 5"): ["SE","SEL","Limited"], | |
| ("Hyundai","Ioniq 6"): ["SE","SEL","Limited"], | |
| ("Hyundai","Santa Cruz"): ["SE","SEL","Night","Limited"], | |
| ("Hyundai","Ioniq 7 / SEVEN"): ["Concept","Preview"], | |
| ("Hyundai","Genesis G70"): ["2.0T","3.3T Sport"], | |
| ("Hyundai","Genesis G80"): ["2.5T","3.5T Sport"], | |
| ("Hyundai","Genesis G90"): ["3.3T Premium","5.0 Ultimate"], | |
| ("Kia","Rio"): ["LX","S"], | |
| ("Kia","Forte"): ["LX","S","EX","GT-Line","GT"], | |
| ("Kia","K5"): ["LX","S","EX","GT-Line"], | |
| ("Kia","Stinger"): ["GT-Line","GT1","GT2"], | |
| ("Kia","Soul"): ["LX","S","EX","GT-Line"], | |
| ("Kia","Seltos"): ["LX","S","EX","SX Turbo"], | |
| ("Kia","Sportage"): ["LX","S","EX","SX Turbo"], | |
| ("Kia","Sorento"): ["LX","S","EX","SX","SX Turbo"], | |
| ("Kia","Telluride"): ["LX","S","EX","SX"], | |
| ("Kia","Carnival"): ["LX","S","EX","SX"], | |
| ("Kia","EV6"): ["Light","Wind","GT-Line"], | |
| ("Kia","Niro"): ["LX","EX","SX Touring"], | |
| ("Kia","EV9"): ["Light","Wind","Land","GT-Line"], | |
| ("Ford","Fiesta"): ["S","SE","Titanium","ST"], | |
| ("Ford","Focus"): ["S","SE","SEL","Titanium","ST","RS"], | |
| ("Ford","Fusion"): ["S","SE","SEL","Titanium","Sport"], | |
| ("Ford","Mustang"): ["EcoBoost","GT","Mach 1","Shelby GT350","Shelby GT500"], | |
| ("Ford","Escape"): ["S","SE","SEL","Titanium"], | |
| ("Ford","Edge"): ["SE","SEL","Titanium","ST"], | |
| ("Ford","Explorer"): ["Base","XLT","Limited","King Ranch","Platinum","ST"], | |
| ("Ford","Expedition"): ["XLT","Limited","King Ranch","Platinum"], | |
| ("Ford","F-150"): ["Regular Cab","SuperCab","SuperCrew","Raptor","Lightning"], | |
| ("Ford","Ranger"): ["XL","XLT","Lariat"], | |
| ("Ford","Bronco"): ["Base","Big Bend","Black Diamond","Outer Banks","Badlands","Wildtrak","Raptor"], | |
| ("Ford","Maverick"): ["XL","XLT","Lariat","Tremor"], | |
| ("Ford","Bronco Sport"): ["Base","Big Bend","Outer Banks","Badlands"], | |
| ("Chevrolet","Spark"): ["LS","LT","Premier"], | |
| ("Chevrolet","Sonic"): ["LS","LT","Premier"], | |
| ("Chevrolet","Cruze"): ["L","LS","LT","Premier"], | |
| ("Chevrolet","Malibu"): ["L","LS","LT","Premier"], | |
| ("Chevrolet","Impala"): ["LS","LT","Premier"], | |
| ("Chevrolet","Camaro"): ["1LS","1LT","2LT","1SS","2SS","ZL1"], | |
| ("Chevrolet","Corvette"): ["1LT","2LT","3LT","Z06","ZR1"], | |
| ("Chevrolet","Trax"): ["L","LS","LT","Premier"], | |
| ("Chevrolet","Equinox"): ["L","LS","LT","Premier"], | |
| ("Chevrolet","Traverse"): ["L","LS","LT","Premier","High Country"], | |
| ("Chevrolet","Tahoe"): ["LS","LT","RST","Premier","High Country"], | |
| ("Chevrolet","Suburban"): ["LS","LT","RST","Premier","High Country"], | |
| ("Chevrolet","Silverado 1500"): ["Work Truck","Custom","LT","RST","LTZ","High Country"], | |
| ("Chevrolet","Blazer"): ["LT","RS","Premier"], | |
| ("Chevrolet","Trailblazer"): ["LS","LT","ACTIV","RS"], | |
| ("Chevrolet","Bolt EV"): ["1LT","2LT"], | |
| ("Lexus","IS"): ["300","350","500 F SPORT Performance"], | |
| ("Lexus","ES"): ["250","300h","350"], | |
| ("Lexus","GS"): ["300","350","450h","F"], | |
| ("Lexus","LS"): ["500","500h"], | |
| ("Lexus","LC"): ["500","500h"], | |
| ("Lexus","RC"): ["300","350","F"], | |
| ("Lexus","UX"): ["200","250h"], | |
| ("Lexus","NX"): ["250","350","350h","450h+"], | |
| ("Lexus","RX"): ["350","350h","500h F SPORT Performance"], | |
| ("Lexus","GX"): ["460"], | |
| ("Lexus","LX"): ["570","600"], | |
| ("Infiniti","Q50"): ["Pure","Luxe","Sensory","Red Sport 400"], | |
| ("Infiniti","Q60"): ["Pure","Luxe","Sensory","Red Sport 400"], | |
| ("Infiniti","Q70"): ["Base","Sport"], | |
| ("Infiniti","QX50"): ["Pure","Luxe","Sensory","Autograph"], | |
| ("Infiniti","QX60"): ["Pure","Luxe","Sensory","Autograph"], | |
| ("Infiniti","QX80"): ["Pure","Luxe","Sensory","Autograph"], | |
| ("Infiniti","QX55"): ["Luxe","Essential","Sensory"], | |
| ("Acura","ILX"): ["Base","Premium","A-Spec"], | |
| ("Acura","TLX"): ["Base","Technology","A-Spec","Advance","Type S"], | |
| ("Acura","RLX"): ["Base","Technology","Advance"], | |
| ("Acura","NSX"): ["Base","Type S"], | |
| ("Acura","RDX"): ["Base","Technology","A-Spec","Advance"], | |
| ("Acura","MDX"): ["Base","Technology","A-Spec","Advance","Type S"], | |
| ("Cadillac","ATS"): ["Base","Luxury","Premium Luxury","V-Sport","V"], | |
| ("Cadillac","CTS"): ["Base","Luxury","Premium Luxury","V-Sport","V"], | |
| ("Cadillac","CT4"): ["Luxury","Premium Luxury","Sport","V-Series","V-Series Blackwing"], | |
| ("Cadillac","CT5"): ["Luxury","Premium Luxury","Sport","V-Series","V-Series Blackwing"], | |
| ("Cadillac","XTS"): ["Base","Luxury","Premium Luxury","Platinum"], | |
| ("Cadillac","XT4"): ["Luxury","Premium Luxury","Sport"], | |
| ("Cadillac","XT5"): ["Luxury","Premium Luxury","Sport"], | |
| ("Cadillac","XT6"): ["Luxury","Premium Luxury","Sport"], | |
| ("Cadillac","Escalade"): ["Luxury","Premium Luxury","Sport","V-Series"], | |
| ("Lincoln","MKZ"): ["Premiere","Select","Reserve"], | |
| ("Lincoln","Continental"): ["Premiere","Select","Reserve","Coach Door Edition"], | |
| ("Lincoln","Corsair"): ["Base","Reserve"], | |
| ("Lincoln","Nautilus"): ["Base","Reserve"], | |
| ("Lincoln","Aviator"): ["Premiere","Reserve","Grand Touring","Black Label"], | |
| ("Lincoln","Navigator"): ["Premiere","Select","Reserve","Black Label"], | |
| ("Buick","Verano"): ["Base","Convenience","Leather","Premium"], | |
| ("Buick","Regal"): ["1SV","Preferred","Essence","GS"], | |
| ("Buick","LaCrosse"): ["Base","Preferred","Essence","Premium","Avenir"], | |
| ("Buick","Encore"): ["Base","Convenience","Leather","Premium"], | |
| ("Buick","Envision"): ["Preferred","Essence","Premium","Avenir"], | |
| ("Buick","Enclave"): ["Base","Essence","Premium","Avenir"], | |
| ("Genesis","G70"): ["2.0T","2.0T Advanced","3.3T Sport","3.3T Sport Prestige"], | |
| ("Genesis","G80"): ["2.5T","2.5T Advanced","3.5T Sport","3.5T Sport Prestige"], | |
| ("Genesis","G90"): ["3.3T Premium","3.3T Prestige","5.0 Ultimate"], | |
| ("Genesis","GV70"): ["2.5T","2.5T Advanced","3.5T Sport","3.5T Sport Prestige"], | |
| ("Genesis","GV80"): ["2.5T","2.5T Advanced","3.5T","3.5T Prestige"], | |
| ("Tesla","Model S"): ["Standard Range","Long Range","Plaid"], | |
| ("Tesla","Model 3"): ["Standard Range Plus","Long Range","Performance"], | |
| ("Tesla","Model X"): ["Standard Range","Long Range","Plaid"], | |
| ("Tesla","Model Y"): ["Standard Range","Long Range","Performance"], | |
| ("Porsche","911"): ["Carrera","Carrera S","Carrera 4","Carrera 4S","Turbo","Turbo S","GT3","GT3 RS","GT2 RS"], | |
| ("Porsche","Boxster"): ["Base","S","GTS","Spyder"], | |
| ("Porsche","Cayman"): ["Base","S","GTS","GT4"], | |
| ("Porsche","Panamera"): ["Base","4","S","4S","GTS","Turbo","Turbo S"], | |
| ("Porsche","Macan"): ["Base","S","GTS","Turbo"], | |
| ("Porsche","Cayenne"): ["Base","S","GTS","Turbo","Turbo S","E-Hybrid"], | |
| ("Porsche","Taycan"): ["Base","4S","Turbo","Turbo S"], | |
| ("Jaguar","XE"): ["Base","Premium","Prestige","R-Sport","S"], | |
| ("Jaguar","XF"): ["Premium","Prestige","R-Sport","S"], | |
| ("Jaguar","XJ"): ["Premium Luxury","Portfolio","Autobiography","XJR575"], | |
| ("Jaguar","F-TYPE"): ["Base","Premium","R-Dynamic","R","SVR"], | |
| ("Jaguar","E-PACE"): ["Base","S","SE","HSE","R-Dynamic"], | |
| ("Jaguar","F-PACE"): ["Premium","Prestige","R-Sport","S","SVR"], | |
| ("Jaguar","I-PACE"): ["S","SE","HSE","First Edition"], | |
| ("Land Rover","Range Rover Evoque"): ["S","SE","HSE","HSE Dynamic","Autobiography"], | |
| ("Land Rover","Range Rover Velar"): ["S","SE","HSE","R-Dynamic","P380","P550"], | |
| ("Land Rover","Range Rover Sport"): ["HSE","HSE Dynamic","Autobiography","SVR"], | |
| ("Land Rover","Range Rover"): ["Base","HSE","Autobiography","SV","SVAutobiography"], | |
| ("Land Rover","Discovery Sport"): ["S","SE","HSE","HSE Luxury"], | |
| ("Land Rover","Discovery"): ["S","SE","HSE","HSE Luxury"], | |
| ("Land Rover","Defender"): ["90","110","130","X","X-Dynamic","First Edition"], | |
| ("Volvo","S60"): ["T5 Momentum","T5 R-Design","T6 R-Design","T8 Polestar"], | |
| ("Volvo","S90"): ["T5 Momentum","T6 Momentum","T6 R-Design","T8 Inscription"], | |
| ("Volvo","V60"): ["T5 Momentum","T5 R-Design","T6 R-Design","T8 Polestar"], | |
| ("Volvo","V90"): ["T5 Momentum","T6 Momentum","T6 R-Design","T8 Inscription"], | |
| ("Volvo","XC40"): ["T4 Momentum","T5 Momentum","T5 R-Design","Recharge"], | |
| ("Volvo","XC60"): ["Core","Plus","Ultimate","Polestar Engineered"], | |
| ("Volvo","XC90"): ["Momentum","R-Design","Inscription","Recharge"], | |
| ("MINI","Cooper"): ["One","Cooper","Cooper S","John Cooper Works"], | |
| ("Mini","Cooper"): ["Base","Classic","Signature","Iconic","John Cooper Works"], | |
| ("Mini","Countryman"): ["Classic","Signature","Iconic","John Cooper Works"], | |
| ("Mini","Clubman"): ["Classic","Signature","Iconic","John Cooper Works"], | |
| ("Mazda","Mazda3"): ["Base","Select","Preferred","Premium","Turbo"], | |
| ("Mazda","Mazda6"): ["Sport","Touring","Grand Touring","Grand Touring Reserve","Signature"], | |
| ("Mazda","CX-3"): ["Sport","Touring","Grand Touring"], | |
| ("Mazda","CX-30"): ["S","Select","Preferred","Premium","Turbo","Turbo Premium Plus"], | |
| ("Mazda","CX-5"): ["S","Select","Preferred","Premium","Turbo","Turbo Signature"], | |
| ("Mazda","CX-50"): ["2.5 S","2.5 S Select","2.5 S Preferred","2.5 Turbo","2.5 Turbo Premium"], | |
| ("Mazda","CX-9"): ["Sport","Touring","Grand Touring","Signature","Carbon Edition"], | |
| ("Mazda","MX-5 Miata"): ["Sport","Club","Grand Touring"], | |
| ("Mazda","MX-30"): ["EV","EV Premium Plus"], | |
| ("Subaru","Impreza"): ["Base","Premium","Sport","Limited"], | |
| ("Subaru","WRX"): ["Base","Premium","Limited","GT","STI"], | |
| ("Subaru","BRZ"): ["Premium","Limited","tS"], | |
| ("Subaru","Legacy"): ["Base","Premium","Sport","Limited","Touring XT"], | |
| ("Subaru","Outback"): ["Base","Premium","Limited","Onyx Edition XT","Wilderness","Touring XT"], | |
| ("Subaru","Forester"): ["Base","Premium","Sport","Wilderness","Limited","Touring"], | |
| ("Subaru","Crosstrek"): ["Base","Premium","Sport","Limited","Hybrid"], | |
| ("Subaru","Ascent"): ["Base","Premium","Onyx Edition","Limited","Touring"], | |
| ("Mitsubishi","Mirage"): ["ES","LE","SE","GT"], | |
| ("Mitsubishi","Outlander"): ["ES","SE","SEL","Black Edition","GT","PHEV"], | |
| ("Mitsubishi","Outlander Sport"): ["S","ES","LE","SE","GT"], | |
| ("Mitsubishi","Eclipse Cross"): ["ES","LE","SE","SEL"], | |
| ("Mitsubishi","Pajero"): ["GL","GLS","Exceed"], | |
| ("Mitsubishi","L200 / Triton"): ["GLX","GLS","Exceed"], | |
| ("Peugeot","208"): ["Active","Allure","GT"], | |
| ("Peugeot","2008"): ["Active","Allure","GT"], | |
| ("Peugeot","308"): ["Active","Allure","GT"], | |
| ("Peugeot","3008"): ["Active","Allure","GT","GT Pack"], | |
| ("Peugeot","5008"): ["Active","Allure","GT"], | |
| ("Peugeot","508"): ["Active","Allure","GT","PSE"], | |
| ("Renault","Clio"): ["Play","Iconic","S Edition","RS Line"], | |
| ("Renault","Megane"): ["Play","Iconic","RS Line","RS Trophy"], | |
| ("Renault","Captur"): ["Play","Iconic","S Edition","RS Line"], | |
| ("Renault","Kadjar"): ["Play","Iconic","S Edition","GT Line"], | |
| ("Renault","Arkana"): ["Iconic","S Edition","RS Line"], | |
| ("Renault","Austral"): ["Equilibre","Techno","Esprit Alpine"], | |
| ("Skoda","Fabia"): ["S","SE","SE L","Monte Carlo"], | |
| ("Skoda","Octavia"): ["S","SE","SE L","SportLine","vRS"], | |
| ("Skoda","Superb"): ["SE","SE L","SportLine","L&K"], | |
| ("Skoda","Karoq"): ["SE Drive","SE L","SportLine"], | |
| ("Skoda","Kodiaq"): ["SE","SE L","SportLine","vRS"], | |
| ("Seat","Ibiza"): ["Reference","Style","Xcellence","FR"], | |
| ("Seat","Leon"): ["Reference","Style","Xcellence","FR","Cupra"], | |
| ("Seat","Arona"): ["Reference","Style","Xcellence","FR"], | |
| ("Seat","Ateca"): ["Reference","Style","Xcellence","FR"], | |
| ("Seat","Tarraco"): ["SE","SE Technology","Xcellence","FR"], | |
| } | |
| def _norm(x): return str(x or "").strip().lower() | |
| REAL_TRIMS_N = {(_norm(mk), _norm(md)): trims for (mk, md), trims in REAL_TRIMS.items()} | |
| def load_df(): | |
| if os.path.exists(CSV_MAIN): | |
| return pd.read_csv(CSV_MAIN) | |
| parts = sorted(glob.glob(CSV_PARTS_GLOB)) | |
| if parts: | |
| df = pd.concat([pd.read_csv(p) for p in parts], ignore_index=True) | |
| df.to_csv(CSV_MAIN, index=False) | |
| return df | |
| raise FileNotFoundError("Upload RideSearch_dataset.csv or the parts RideSearch_part*_small.csv.") | |
| def unify_columns(df: pd.DataFrame) -> pd.DataFrame: | |
| df = df.copy() | |
| if "popularity" in df.columns and "popularity_score" not in df.columns: df["popularity_score"] = df["popularity"] | |
| if "comfort" in df.columns and "comfort_score" not in df.columns: df["comfort_score"] = df["comfort"] | |
| if "reliability" in df.columns and "reliability_score" not in df.columns: df["reliability_score"] = df["reliability"] | |
| if "safety" in df.columns and "safety_rating" not in df.columns: df["safety_rating"] = df["safety"] | |
| if "trim_display" not in df.columns: df["trim_display"] = df["trim"] if "trim" in df.columns else "" | |
| for c in NUM_COLS_CANON: | |
| if c not in df.columns: df[c] = np.nan | |
| for c in NUM_COLS_CANON: df[c] = pd.to_numeric(df[c], errors="coerce") | |
| if "text_record" not in df.columns: | |
| cols = ["make","model","trim_display","body_type","fuel","engine_type"] | |
| cols = [c for c in cols if c in df.columns] | |
| df["text_record"] = df[cols].fillna("").astype(str).agg(" ".join, axis=1) | |
| for col in ["make","model","body_type","fuel","engine_type","name"]: | |
| if col not in df.columns: df[col] = "" | |
| return df | |
| def suspicious_zero_to_100(series: pd.Series) -> bool: | |
| s = series.dropna() | |
| if len(s) < 20: return False | |
| return (s.nunique() <= max(2, int(0.03*len(s)))) or (s.std() < 0.18) | |
| def estimate_0_100(row): | |
| try: hp = float(row.get("horsepower", 150) or 150) | |
| except Exception: hp = 150.0 | |
| body = str(row.get("body_type","")).lower() | |
| fuel = str(row.get("fuel","")).lower() | |
| trim = f"{row.get('model','')} {row.get('trim_display','')}".lower() | |
| base = 26.0 - 3.2*np.log(max(hp, 60.0)) | |
| if any(k in body for k in ["suv","crossover","pickup","truck","van"]): base += 0.7 | |
| if any(k in body for k in ["coupe","roadster"]): base -= 0.4 | |
| if "electric" in fuel or "ev" in fuel: base -= 0.8 | |
| if "hybrid" in fuel: base -= 0.3 | |
| if "diesel" in fuel: base += 0.2 | |
| if any(x in trim for x in [" m "," amg","rs","type r","sti","gts","gt3","hellcat","svr","cupra","john cooper works"]): base -= 0.6 | |
| return float(np.clip(round(base,2), 2.8, 14.5)) | |
| def ensure_embeddings(df): | |
| txt_ok = os.path.exists(EMB_TEXT_NPY) | |
| num_ok = os.path.exists(EMB_NUM_NPY) | |
| if txt_ok and num_ok: | |
| return np.load(EMB_TEXT_NPY), np.load(EMB_NUM_NPY) | |
| if SentenceTransformer is not None: | |
| model = SentenceTransformer(EMBED_MODEL) | |
| Etext = model.encode(df["text_record"].astype(str).tolist(), batch_size=256, show_progress_bar=False, normalize_embeddings=True).astype("float32") | |
| else: | |
| if HashingVectorizer is None: raise RuntimeError("Install sentence-transformers or scikit-learn for text embeddings.") | |
| hv = HashingVectorizer(n_features=512, alternate_sign=False, norm="l2") | |
| Etext = hv.transform(df["text_record"].fillna("").tolist()).toarray().astype("float32") | |
| np.save(EMB_TEXT_NPY, Etext) | |
| X = df[["horsepower","zero_to_100_kmh_s","seats","cargo_liters","price_usd","popularity_score","comfort_score","reliability_score","tech_score","ownership_cost_score","safety_rating"]].copy() | |
| X = X.fillna(X.mean(numeric_only=True)) | |
| scaler = StandardScaler() | |
| Enum = scaler.fit_transform(X.values.astype("float32")).astype("float32") | |
| np.save(EMB_NUM_NPY, Enum) | |
| return Etext, Enum | |
| def load_overrides(): | |
| if os.path.exists(TRIMS_OVERRIDES): | |
| try: | |
| with open(TRIMS_OVERRIDES, "r", encoding="utf-8") as f: return json.load(f) | |
| except Exception: return {} | |
| return {} | |
| def save_overrides(data): | |
| with open(TRIMS_OVERRIDES, "w", encoding="utf-8") as f: json.dump(data, f, indent=2, ensure_ascii=False) | |
| def learned_trims_from_csv(df): | |
| out = {} | |
| for (mk, md), sub in df.groupby(["make", "model"]): | |
| vals = (sub["trim_display"].dropna().astype(str).map(str.strip).replace({"": np.nan}).dropna().value_counts().index.tolist()) | |
| if vals: out[(_norm(mk), _norm(md))] = vals[:20] | |
| return out | |
| def get_trims_for(make, model, learned, overrides): | |
| if overrides.get(make, {}).get(model): return overrides[make][model] | |
| mk, md = _norm(make), _norm(model) | |
| if (mk, md) in REAL_TRIMS_N: return REAL_TRIMS_N[(mk, md)] | |
| if (mk, md) in learned: return learned[(mk, md)] | |
| return ["Base"] | |
| def wiki_image(make, model, year=None): | |
| q = f"{year} {make} {model}" if year else f"{make} {model}" | |
| try: | |
| r = requests.get("https://en.wikipedia.org/w/api.php", params={"action":"query","format":"json","prop":"pageimages","piprop":"thumbnail","pithumbsize":600,"generator":"search","gsrsearch":q,"gsrlimit":1,"gsrnamespace":0}, timeout=6).json() | |
| pages = r.get("query",{}).get("pages",{}) | |
| if pages: | |
| page = next(iter(pages.values())) | |
| return page.get("thumbnail",{}).get("source") | |
| except Exception: | |
| pass | |
| return None | |
| def cosine_sim_row_vs_mat(mat, vec): | |
| mat = mat / (np.linalg.norm(mat, axis=1, keepdims=True)+1e-9) | |
| v = vec / (np.linalg.norm(vec)+1e-9) | |
| return mat @ v | |
| def recommend(df, Etext, Enum, make, model, trim, year, topk, alpha, body, fuel, y_min, y_max, p_min, p_max, safety, reliab, cross_brand_only=True, exclude_same_model=True, unique_brand=True): | |
| sub = df[(df["make"]==make) & (df["model"]==model)].copy() | |
| if trim: sub = sub[sub["trim_display"].astype(str).str.lower()==str(trim).lower()] | |
| if year: sub = sub[pd.to_numeric(sub["year"], errors="coerce")==int(year)] | |
| if sub.empty: | |
| sub = df[(df["make"]==make) & (df["model"]==model)].copy() | |
| if sub.empty: return "No such make/model in dataset.", None, "", [] | |
| a = sub.sort_values("popularity_score", ascending=False).iloc[0] | |
| a_idx = int(a.name) | |
| pool = df.copy() | |
| if cross_brand_only: pool = pool[pool["make"] != a["make"]] | |
| if exclude_same_model: pool = pool[~((pool["make"]==a["make"]) & (pool["model"]==a["model"]))] | |
| if body and body!="Any": pool = pool[pool["body_type"]==body] | |
| if fuel and fuel!="Any": pool = pool[pool["fuel"]==fuel] | |
| pool = pool[(pool["year"]>=y_min) & (pool["year"]<=y_max)] | |
| pool = pool[(pool["price_usd"]>=p_min) & (pool["price_usd"]<=p_max)] | |
| pool = pool[(pool["safety_rating"]>=safety) & (pool["reliability_score"]>=reliab)] | |
| if pool.empty: return "No cars after your filters. Loosen price/year/safety.", None, "", [] | |
| cand_idx = pool.index.values | |
| st = cosine_sim_row_vs_mat(Etext[cand_idx], Etext[a_idx]) | |
| sn = cosine_sim_row_vs_mat(Enum[cand_idx], Enum[a_idx]) | |
| s = float(alpha)*st + (1-float(alpha))*sn | |
| order = np.argsort(-s) | |
| chosen, seen = [], set() | |
| for j in order: | |
| k = cand_idx[j] | |
| brand = str(df.loc[k,"make"]).lower() | |
| if unique_brand and brand in seen: continue | |
| seen.add(brand) | |
| chosen.append(k) | |
| if len(chosen) >= int(topk): break | |
| if not chosen: return "No recommendations after constraints.", None, "", [] | |
| out = df.loc[chosen].copy() | |
| sim_lookup = {cand_idx[i]: float(s[i]) for i in range(len(cand_idx))} | |
| out["similarity_%"] = [round(sim_lookup[k]*100,1) for k in chosen] | |
| cols = ["name","make","model","trim_display","year","body_type","fuel","engine_type","price_usd","horsepower","zero_to_100_kmh_s","popularity_score","comfort_score","reliability_score","tech_score","ownership_cost_score","safety_rating","similarity_%"] | |
| table = out[[c for c in cols if c in out.columns]].reset_index(drop=True) | |
| disp_trim = str(a.get("trim_display","")).strip() | |
| anchor_md = (f"**{a['make']} {a['model']} {disp_trim} {int(a['year']) if pd.notna(a['year']) else ''}** \n" | |
| f"Body: {a.get('body_type','')} • Fuel: {a.get('fuel','')} • Engine: {a.get('engine_type','')} \n" | |
| f"HP: {int(a['horsepower']) if pd.notna(a['horsepower']) else '—'} • 0–100: {a.get('zero_to_100_kmh_s','—')}s " | |
| f"• Price: ${int(a['price_usd']) if pd.notna(a['price_usd']) else '—'} \n" | |
| f"Popularity {int(a.get('popularity_score',0))}/10 • Comfort {int(a.get('comfort_score',0))}/10 • " | |
| f"Reliability {int(a.get('reliability_score',0))}/100 • Safety {int(a.get('safety_rating',0))}★") | |
| cards = [] | |
| for _, r in out.head(3).iterrows(): | |
| img = wiki_image(r.get("make",""), r.get("model",""), r.get("year",None)) | |
| title = f"{r.get('make','')} {r.get('model','')} {str(r.get('trim_display','')).strip()}" | |
| cards.append((img, title)) | |
| return None, table, anchor_md, cards | |
| def build_app(): | |
| df = unify_columns(load_df()) | |
| if suspicious_zero_to_100(df["zero_to_100_kmh_s"]): df["zero_to_100_kmh_s"] = df.apply(estimate_0_100, axis=1) | |
| Etext, Enum = ensure_embeddings(df) | |
| learned = learned_trims_from_csv(df) | |
| overrides = load_overrides() | |
| makes = sorted(df["make"].dropna().astype(str).unique().tolist()) | |
| body_choices = ["Any"] + sorted([b for b in df["body_type"].dropna().astype(str).unique().tolist() if b]) | |
| fuel_choices = ["Any"] + sorted([f for f in df["fuel"].dropna().astype(str).unique().tolist() if f]) | |
| y_lo, y_hi = int(df["year"].min(skipna=True)), int(df["year"].max(skipna=True)) | |
| p_lo, p_hi = int(df["price_usd"].min(skipna=True)), int(df["price_usd"].max(skipna=True)) | |
| def models_for(make): | |
| if not make: return gr.update(choices=[], value=None) | |
| opts = sorted(df.loc[df["make"].eq(make), "model"].dropna().astype(str).unique().tolist()) | |
| return gr.update(choices=opts, value=None) | |
| def trims_years_for(make, model): | |
| if not make or not model: return gr.update(choices=[], value=None), gr.update(choices=[], value=None) | |
| trims = get_trims_for(make, model, learned, overrides) | |
| years = sorted(pd.to_numeric(df[(df["make"]==make)&(df["model"]==model)]["year"], errors="coerce").dropna().astype(int).unique().tolist()) | |
| return gr.update(choices=trims, value=None), gr.update(choices=[None]+years, value=None) | |
| with gr.Blocks(theme=gr.themes.Soft(), title="RideSearch") as demo: | |
| gr.Markdown("## RideSearch — cross-brand recommendations with realistic trims & photos") | |
| with gr.Tab("Pick & Recommend"): | |
| with gr.Row(): | |
| mk = gr.Dropdown(makes, label="Make") | |
| md = gr.Dropdown([], label="Model") | |
| tr = gr.Dropdown([], label="Trim (optional)") | |
| yr = gr.Dropdown([], label="Year (optional)") | |
| mk.change(models_for, mk, md) | |
| md.change(lambda a,b: trims_years_for(a,b), [mk, md], [tr, yr]) | |
| with gr.Row(): | |
| body = gr.Dropdown(body_choices, value="Any", label="Body") | |
| fuel = gr.Dropdown(fuel_choices, value="Any", label="Fuel") | |
| with gr.Row(): | |
| y_min = gr.Slider(y_lo, y_hi, value=y_lo, step=1, label="Year min") | |
| y_max = gr.Slider(y_lo, y_hi, value=y_hi, step=1, label="Year max") | |
| with gr.Row(): | |
| p_min = gr.Slider(p_lo, p_hi, value=p_lo, step=500, label="Price min (USD)") | |
| p_max = gr.Slider(p_lo, p_hi, value=min(p_hi, max(p_lo+5000, p_lo+20000)), step=500, label="Price max (USD)") | |
| with gr.Row(): | |
| safety = gr.Slider(3, 5, value=4, step=1, label="Min Safety ★") | |
| reliab = gr.Slider(55, 99, value=70, step=1, label="Min Reliability") | |
| with gr.Row(): | |
| topk = gr.Slider(1, 10, value=5, step=1, label="Recommendations") | |
| alpha = gr.Slider(0, 1, value=0.7, step=0.05, label="α — Text vs Numeric") | |
| with gr.Row(): | |
| cross = gr.Checkbox(True, label="Cross-brand only") | |
| xmodel = gr.Checkbox(True, label="Exclude same model family") | |
| uniqb = gr.Checkbox(True, label="Unique brands (no repeats)") | |
| run = gr.Button("Recommend", variant="primary") | |
| err = gr.Markdown() | |
| anchor_md = gr.Markdown() | |
| table = gr.Dataframe(interactive=False, wrap=True, label="Recommendations") | |
| gallery = gr.Gallery(label="Photos", height=220, columns=[3]) | |
| def on_click(mk_, md_, tr_, yr_, topk_, alpha_, body_, fuel_, y_min_, y_max_, p_min_, p_max_, safety_, reliab_, cross_, xmodel_, uniqb_): | |
| msg, tbl, atext, cards = recommend( | |
| df, Etext, Enum, mk_, md_, tr_, yr_, int(topk_), float(alpha_), | |
| body_, fuel_, int(y_min_), int(y_max_), int(p_min_), int(p_max_), int(safety_), int(reliab_), | |
| cross_brand_only=bool(cross_), exclude_same_model=bool(xmodel_), unique_brand=bool(uniqb_) | |
| ) | |
| if msg: return gr.update(value=f"**{msg}**"), gr.update(value=""), pd.DataFrame(), [] | |
| return gr.update(value=""), gr.update(value=atext), tbl, cards | |
| run.click(on_click, | |
| [mk, md, tr, yr, topk, alpha, body, fuel, y_min, y_max, p_min, p_max, safety, reliab, cross, xmodel, uniqb], | |
| [err, anchor_md, table, gallery]) | |
| with gr.Tab("Admin • Trim Overrides"): | |
| gr.Markdown("Paste correct trims (one per line) for a model. Saved to trims_overrides.json.") | |
| a_mk = gr.Dropdown(makes, label="Make") | |
| a_md = gr.Dropdown([], label="Model") | |
| a_txt = gr.Textbox(lines=8, label="Display trims (one per line)") | |
| save_btn = gr.Button("Save override") | |
| save_msg = gr.Markdown() | |
| a_mk.change(models_for, a_mk, a_md) | |
| def do_save(make, model, txt): | |
| nonlocal overrides | |
| trims = [t.strip() for t in str(txt).splitlines() if t.strip()] | |
| if not make or not model or not trims: return "⚠️ Provide make, model, and at least one trim." | |
| if make not in overrides: overrides[make] = {} | |
| overrides[make][model] = trims | |
| save_overrides(overrides) | |
| return f"✅ Saved {len(trims)} trims for {make} {model}. Refresh the Pick tab." | |
| save_btn.click(do_save, [a_mk, a_md, a_txt], save_msg) | |
| gr.Markdown("First run may be slow while embeddings build.") | |
| return demo | |
| if __name__ == "__main__": | |
| demo = build_app() | |
| demo.queue().launch(server_name="0.0.0.0", server_port=7860) |