Levimichael4 commited on
Commit
579e82e
Β·
verified Β·
1 Parent(s): 8c147e6

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +566 -0
app.py ADDED
@@ -0,0 +1,566 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, glob, json, requests
2
+ import numpy as np
3
+ import pandas as pd
4
+ import gradio as gr
5
+ from sklearn.preprocessing import StandardScaler
6
+
7
+ try:
8
+ from sentence_transformers import SentenceTransformer
9
+ except Exception:
10
+ SentenceTransformer = None
11
+ try:
12
+ from sklearn.feature_extraction.text import HashingVectorizer
13
+ except Exception:
14
+ HashingVectorizer = None
15
+
16
+ CSV_MAIN = "RideSearch_dataset.csv"
17
+ CSV_PARTS_GLOB = "RideSearch_part*_small.csv"
18
+ EMB_TEXT_NPY = "emb_text.npy"
19
+ EMB_NUM_NPY = "emb_num.npy"
20
+ TRIMS_OVERRIDES = "trims_overrides.json"
21
+ EMBED_MODEL = os.environ.get("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
22
+
23
+ NUM_COLS_CANON = [
24
+ "horsepower","zero_to_100_kmh_s","seats","cargo_liters","price_usd",
25
+ "popularity_score","comfort_score","reliability_score","tech_score",
26
+ "ownership_cost_score","safety_rating","year"
27
+ ]
28
+
29
+ REAL_TRIMS = {
30
+ ("BMW","1 Series"): ["116i","118i","120i","125i","M135i"],
31
+ ("BMW","2 Series"): ["218i","220i","225i","230i","M235i","M240i"],
32
+ ("BMW","3 Series"): ["318i","320i","325i","330i","330e","335i","340i","M340i","M3"],
33
+ ("BMW","4 Series"): ["420i","430i","435i","440i","M440i","M4"],
34
+ ("BMW","5 Series"): ["520i","525i","530i","530e","535i","540i","550i","M550i","M5"],
35
+ ("BMW","6 Series"): ["630i","640i","650i","M6"],
36
+ ("BMW","7 Series"): ["730i","740i","750i","760i","M760i"],
37
+ ("BMW","8 Series"): ["840i","850i","M850i","M8"],
38
+ ("BMW","X1"): ["sDrive18i","sDrive20i","xDrive20i","xDrive25i"],
39
+ ("BMW","X2"): ["sDrive18i","sDrive20i","xDrive20i","xDrive25i","M35i"],
40
+ ("BMW","X3"): ["sDrive20i","xDrive20i","xDrive30i","M40i","X3 M"],
41
+ ("BMW","X4"): ["xDrive20i","xDrive30i","M40i","X4 M"],
42
+ ("BMW","X5"): ["sDrive40i","xDrive40i","xDrive45e","M50i","X5 M"],
43
+ ("BMW","X6"): ["sDrive40i","xDrive40i","M50i","X6 M"],
44
+ ("BMW","X7"): ["xDrive40i","xDrive50i","M50i"],
45
+ ("BMW","Z4"): ["sDrive20i","sDrive30i","M40i"],
46
+ ("Audi","A1"): ["25 TFSI","30 TFSI","35 TFSI","S1"],
47
+ ("Audi","A3"): ["30 TFSI","35 TFSI","40 TFSI","45 TFSI e","S3","RS3"],
48
+ ("Audi","A4"): ["35 TFSI","40 TFSI","45 TFSI","45 TFSI e","S4","RS4"],
49
+ ("Audi","A5"): ["35 TFSI","40 TFSI","45 TFSI","S5","RS5"],
50
+ ("Audi","A6"): ["40 TFSI","45 TFSI","50 TFSI","55 TFSI e","S6","RS6"],
51
+ ("Audi","A7"): ["45 TFSI","50 TFSI","55 TFSI e","S7","RS7"],
52
+ ("Audi","A8"): ["50 TFSI","55 TFSI","60 TFSI e","S8"],
53
+ ("Audi","Q2"): ["30 TFSI","35 TFSI","40 TFSI"],
54
+ ("Audi","Q3"): ["35 TFSI","40 TFSI","45 TFSI e","RS Q3"],
55
+ ("Audi","Q5"): ["40 TFSI","45 TFSI e","SQ5"],
56
+ ("Audi","Q7"): ["45 TFSI","50 TDI","55 TFSI e","SQ7"],
57
+ ("Audi","Q8"): ["50 TDI","55 TFSI","SQ8","RS Q8"],
58
+ ("Audi","TT"): ["40 TFSI","45 TFSI","TTS","TT RS"],
59
+ ("Audi","R8"): ["V10","V10 Performance","V10 Plus"],
60
+ ("Mercedes-Benz","A-Class"): ["A180","A200","A220","A250","A250e","AMG A35","AMG A45"],
61
+ ("Mercedes-Benz","B-Class"): ["B180","B200","B220","B250e"],
62
+ ("Mercedes-Benz","C-Class"): ["C180","C200","C220d","C250","C300","C350e","AMG C43","AMG C63"],
63
+ ("Mercedes-Benz","E-Class"): ["E200","E220d","E250","E300","E350","E400","E450","AMG E53","AMG E63"],
64
+ ("Mercedes-Benz","S-Class"): ["S350","S400","S450","S500","S580","S600","AMG S63","AMG S65"],
65
+ ("Mercedes-Benz","CLA"): ["CLA180","CLA200","CLA220","CLA250","AMG CLA35","AMG CLA45"],
66
+ ("Mercedes-Benz","CLS"): ["CLS350","CLS400","CLS450","AMG CLS53","AMG CLS63"],
67
+ ("Mercedes-Benz","GLA"): ["GLA180","GLA200","GLA220","GLA250","AMG GLA35","AMG GLA45"],
68
+ ("Mercedes-Benz","GLB"): ["GLB200","GLB220","GLB250","AMG GLB35"],
69
+ ("Mercedes-Benz","GLC"): ["GLC200","GLC220d","GLC300","GLC350e","AMG GLC43","AMG GLC63"],
70
+ ("Mercedes-Benz","GLE"): ["GLE300","GLE350","GLE400","GLE450","GLE580","AMG GLE53","AMG GLE63"],
71
+ ("Mercedes-Benz","GLS"): ["GLS400","GLS450","GLS580","AMG GLS63"],
72
+ ("Mercedes-Benz","G-Class"): ["G350","G400","G500","G550","AMG G63","AMG G65"],
73
+ ("Toyota","Corolla"): ["L","LE","XLE","SE","XSE","GR Corolla"],
74
+ ("Toyota","Camry"): ["L","LE","SE","XLE","XSE","TRD"],
75
+ ("Toyota","Avalon"): ["XLE","XSE","Limited","TRD"],
76
+ ("Toyota","Prius"): ["L","LE","XLE","Limited","Prime"],
77
+ ("Toyota","RAV4"): ["LE","XLE","XLE Premium","Adventure","TRD Off-Road","Limited","Prime"],
78
+ ("Toyota","Highlander"): ["L","LE","XLE","Limited","Platinum","Hybrid"],
79
+ ("Toyota","4Runner"): ["SR5","TRD Off-Road","TRD Pro","Limited"],
80
+ ("Toyota","Tacoma"): ["SR","SR5","TRD Sport","TRD Off-Road","TRD Pro","Limited"],
81
+ ("Toyota","Tundra"): ["SR","SR5","Limited","Platinum","1794","TRD Pro"],
82
+ ("Toyota","Land Cruiser"): ["Base","Heritage Edition"],
83
+ ("Toyota","Supra"): ["2.0","3.0","3.0 Premium"],
84
+ ("Toyota","Yaris"): ["L","LE","XLE","GRMN"],
85
+ ("Toyota","C-HR"): ["LE","XLE","Nightshade","Limited"],
86
+ ("Toyota","Sequoia"): ["SR5","Limited","Platinum","TRD Pro","Capstone"],
87
+ ("Toyota","Sienna"): ["LE","XLE","XSE","Limited","Platinum"],
88
+ ("Honda","Civic"): ["LX","Sport","EX","EX-L","Sport Touring","Touring","Si","Type R"],
89
+ ("Honda","Accord"): ["LX","Sport","EX","EX-L","Touring","Sport-L"],
90
+ ("Honda","CR-V"): ["LX","EX","EX-L","Touring","Hybrid"],
91
+ ("Honda","HR-V"): ["LX","Sport","EX","EX-L"],
92
+ ("Honda","Pilot"): ["LX","EX","EX-L","Touring","Elite","TrailSport"],
93
+ ("Honda","Passport"): ["Sport","EX-L","Touring","Elite","TrailSport"],
94
+ ("Honda","Ridgeline"): ["Sport","RTL","RTL-E","Black Edition"],
95
+ ("Honda","Insight"): ["LX","EX","Touring"],
96
+ ("Honda","Fit / Jazz"): ["LX","Sport","EX","EX-L"],
97
+ ("Honda","Odyssey"): ["EX","EX-L","Touring","Elite"],
98
+ ("Volkswagen","Golf"): ["S","SE","SEL","Autobahn","GTI","R"],
99
+ ("Volkswagen","Jetta"): ["S","SE","SEL","SEL Premium","GLI"],
100
+ ("Volkswagen","Passat"): ["S","SE","SEL Premium","R-Line"],
101
+ ("Volkswagen","Tiguan"): ["S","SE","SEL","SEL Premium","R-Line"],
102
+ ("Volkswagen","Atlas"): ["S","SE","SEL","SEL Premium","Cross Sport"],
103
+ ("Volkswagen","Arteon"): ["SE","SEL","SEL Premium","R-Line"],
104
+ ("Volkswagen","ID.4"): ["Pro","Pro S","1st Edition"],
105
+ ("Nissan","Sentra"): ["S","SV","SR"],
106
+ ("Nissan","Altima"): ["S","SV","SL","SR","Platinum"],
107
+ ("Nissan","Maxima"): ["S","SV","SL","SR","Platinum"],
108
+ ("Nissan","Versa"): ["S","SV","SR"],
109
+ ("Nissan","Rogue"): ["S","SV","SL","Platinum"],
110
+ ("Nissan","Murano"): ["S","SV","SL","Platinum"],
111
+ ("Nissan","Pathfinder"): ["S","SV","SL","Platinum"],
112
+ ("Nissan","Armada"): ["SV","SL","Platinum"],
113
+ ("Nissan","Frontier"): ["S","SV","PRO-4X"],
114
+ ("Nissan","Titan"): ["S","SV","PRO-4X","Platinum Reserve"],
115
+ ("Nissan","Z"): ["Sport","Performance"],
116
+ ("Nissan","GT-R"): ["Premium","NISMO","Track Edition"],
117
+ ("Nissan","Qashqai / Rogue Sport"): ["S","SV","SL"],
118
+ ("Nissan","X-Trail"): ["Visia","Acenta","N-Connecta","Tekna"],
119
+ ("Hyundai","Elantra"): ["SE","SEL","Limited","N Line","N"],
120
+ ("Hyundai","Sonata"): ["SE","SEL","Limited","N Line"],
121
+ ("Hyundai","Accent"): ["SE","SEL"],
122
+ ("Hyundai","Tucson"): ["SE","SEL","Limited","N Line"],
123
+ ("Hyundai","Santa Fe"): ["SE","SEL","Limited","Calligraphy"],
124
+ ("Hyundai","Palisade"): ["SE","SEL","Limited","Calligraphy"],
125
+ ("Hyundai","Kona"): ["SE","SEL","Limited","N Line","N"],
126
+ ("Hyundai","Venue"): ["SE","SEL"],
127
+ ("Hyundai","Ioniq 5"): ["SE","SEL","Limited"],
128
+ ("Hyundai","Ioniq 6"): ["SE","SEL","Limited"],
129
+ ("Hyundai","Santa Cruz"): ["SE","SEL","Night","Limited"],
130
+ ("Hyundai","Ioniq 7 / SEVEN"): ["Concept","Preview"],
131
+ ("Hyundai","Genesis G70"): ["2.0T","3.3T Sport"],
132
+ ("Hyundai","Genesis G80"): ["2.5T","3.5T Sport"],
133
+ ("Hyundai","Genesis G90"): ["3.3T Premium","5.0 Ultimate"],
134
+ ("Kia","Rio"): ["LX","S"],
135
+ ("Kia","Forte"): ["LX","S","EX","GT-Line","GT"],
136
+ ("Kia","K5"): ["LX","S","EX","GT-Line"],
137
+ ("Kia","Stinger"): ["GT-Line","GT1","GT2"],
138
+ ("Kia","Soul"): ["LX","S","EX","GT-Line"],
139
+ ("Kia","Seltos"): ["LX","S","EX","SX Turbo"],
140
+ ("Kia","Sportage"): ["LX","S","EX","SX Turbo"],
141
+ ("Kia","Sorento"): ["LX","S","EX","SX","SX Turbo"],
142
+ ("Kia","Telluride"): ["LX","S","EX","SX"],
143
+ ("Kia","Carnival"): ["LX","S","EX","SX"],
144
+ ("Kia","EV6"): ["Light","Wind","GT-Line"],
145
+ ("Kia","Niro"): ["LX","EX","SX Touring"],
146
+ ("Kia","EV9"): ["Light","Wind","Land","GT-Line"],
147
+ ("Ford","Fiesta"): ["S","SE","Titanium","ST"],
148
+ ("Ford","Focus"): ["S","SE","SEL","Titanium","ST","RS"],
149
+ ("Ford","Fusion"): ["S","SE","SEL","Titanium","Sport"],
150
+ ("Ford","Mustang"): ["EcoBoost","GT","Mach 1","Shelby GT350","Shelby GT500"],
151
+ ("Ford","Escape"): ["S","SE","SEL","Titanium"],
152
+ ("Ford","Edge"): ["SE","SEL","Titanium","ST"],
153
+ ("Ford","Explorer"): ["Base","XLT","Limited","King Ranch","Platinum","ST"],
154
+ ("Ford","Expedition"): ["XLT","Limited","King Ranch","Platinum"],
155
+ ("Ford","F-150"): ["Regular Cab","SuperCab","SuperCrew","Raptor","Lightning"],
156
+ ("Ford","Ranger"): ["XL","XLT","Lariat"],
157
+ ("Ford","Bronco"): ["Base","Big Bend","Black Diamond","Outer Banks","Badlands","Wildtrak","Raptor"],
158
+ ("Ford","Maverick"): ["XL","XLT","Lariat","Tremor"],
159
+ ("Ford","Bronco Sport"): ["Base","Big Bend","Outer Banks","Badlands"],
160
+ ("Chevrolet","Spark"): ["LS","LT","Premier"],
161
+ ("Chevrolet","Sonic"): ["LS","LT","Premier"],
162
+ ("Chevrolet","Cruze"): ["L","LS","LT","Premier"],
163
+ ("Chevrolet","Malibu"): ["L","LS","LT","Premier"],
164
+ ("Chevrolet","Impala"): ["LS","LT","Premier"],
165
+ ("Chevrolet","Camaro"): ["1LS","1LT","2LT","1SS","2SS","ZL1"],
166
+ ("Chevrolet","Corvette"): ["1LT","2LT","3LT","Z06","ZR1"],
167
+ ("Chevrolet","Trax"): ["L","LS","LT","Premier"],
168
+ ("Chevrolet","Equinox"): ["L","LS","LT","Premier"],
169
+ ("Chevrolet","Traverse"): ["L","LS","LT","Premier","High Country"],
170
+ ("Chevrolet","Tahoe"): ["LS","LT","RST","Premier","High Country"],
171
+ ("Chevrolet","Suburban"): ["LS","LT","RST","Premier","High Country"],
172
+ ("Chevrolet","Silverado 1500"): ["Work Truck","Custom","LT","RST","LTZ","High Country"],
173
+ ("Chevrolet","Blazer"): ["LT","RS","Premier"],
174
+ ("Chevrolet","Trailblazer"): ["LS","LT","ACTIV","RS"],
175
+ ("Chevrolet","Bolt EV"): ["1LT","2LT"],
176
+ ("Lexus","IS"): ["300","350","500 F SPORT Performance"],
177
+ ("Lexus","ES"): ["250","300h","350"],
178
+ ("Lexus","GS"): ["300","350","450h","F"],
179
+ ("Lexus","LS"): ["500","500h"],
180
+ ("Lexus","LC"): ["500","500h"],
181
+ ("Lexus","RC"): ["300","350","F"],
182
+ ("Lexus","UX"): ["200","250h"],
183
+ ("Lexus","NX"): ["250","350","350h","450h+"],
184
+ ("Lexus","RX"): ["350","350h","500h F SPORT Performance"],
185
+ ("Lexus","GX"): ["460"],
186
+ ("Lexus","LX"): ["570","600"],
187
+ ("Infiniti","Q50"): ["Pure","Luxe","Sensory","Red Sport 400"],
188
+ ("Infiniti","Q60"): ["Pure","Luxe","Sensory","Red Sport 400"],
189
+ ("Infiniti","Q70"): ["Base","Sport"],
190
+ ("Infiniti","QX50"): ["Pure","Luxe","Sensory","Autograph"],
191
+ ("Infiniti","QX60"): ["Pure","Luxe","Sensory","Autograph"],
192
+ ("Infiniti","QX80"): ["Pure","Luxe","Sensory","Autograph"],
193
+ ("Infiniti","QX55"): ["Luxe","Essential","Sensory"],
194
+ ("Acura","ILX"): ["Base","Premium","A-Spec"],
195
+ ("Acura","TLX"): ["Base","Technology","A-Spec","Advance","Type S"],
196
+ ("Acura","RLX"): ["Base","Technology","Advance"],
197
+ ("Acura","NSX"): ["Base","Type S"],
198
+ ("Acura","RDX"): ["Base","Technology","A-Spec","Advance"],
199
+ ("Acura","MDX"): ["Base","Technology","A-Spec","Advance","Type S"],
200
+ ("Cadillac","ATS"): ["Base","Luxury","Premium Luxury","V-Sport","V"],
201
+ ("Cadillac","CTS"): ["Base","Luxury","Premium Luxury","V-Sport","V"],
202
+ ("Cadillac","CT4"): ["Luxury","Premium Luxury","Sport","V-Series","V-Series Blackwing"],
203
+ ("Cadillac","CT5"): ["Luxury","Premium Luxury","Sport","V-Series","V-Series Blackwing"],
204
+ ("Cadillac","XTS"): ["Base","Luxury","Premium Luxury","Platinum"],
205
+ ("Cadillac","XT4"): ["Luxury","Premium Luxury","Sport"],
206
+ ("Cadillac","XT5"): ["Luxury","Premium Luxury","Sport"],
207
+ ("Cadillac","XT6"): ["Luxury","Premium Luxury","Sport"],
208
+ ("Cadillac","Escalade"): ["Luxury","Premium Luxury","Sport","V-Series"],
209
+ ("Lincoln","MKZ"): ["Premiere","Select","Reserve"],
210
+ ("Lincoln","Continental"): ["Premiere","Select","Reserve","Coach Door Edition"],
211
+ ("Lincoln","Corsair"): ["Base","Reserve"],
212
+ ("Lincoln","Nautilus"): ["Base","Reserve"],
213
+ ("Lincoln","Aviator"): ["Premiere","Reserve","Grand Touring","Black Label"],
214
+ ("Lincoln","Navigator"): ["Premiere","Select","Reserve","Black Label"],
215
+ ("Buick","Verano"): ["Base","Convenience","Leather","Premium"],
216
+ ("Buick","Regal"): ["1SV","Preferred","Essence","GS"],
217
+ ("Buick","LaCrosse"): ["Base","Preferred","Essence","Premium","Avenir"],
218
+ ("Buick","Encore"): ["Base","Convenience","Leather","Premium"],
219
+ ("Buick","Envision"): ["Preferred","Essence","Premium","Avenir"],
220
+ ("Buick","Enclave"): ["Base","Essence","Premium","Avenir"],
221
+ ("Genesis","G70"): ["2.0T","2.0T Advanced","3.3T Sport","3.3T Sport Prestige"],
222
+ ("Genesis","G80"): ["2.5T","2.5T Advanced","3.5T Sport","3.5T Sport Prestige"],
223
+ ("Genesis","G90"): ["3.3T Premium","3.3T Prestige","5.0 Ultimate"],
224
+ ("Genesis","GV70"): ["2.5T","2.5T Advanced","3.5T Sport","3.5T Sport Prestige"],
225
+ ("Genesis","GV80"): ["2.5T","2.5T Advanced","3.5T","3.5T Prestige"],
226
+ ("Tesla","Model S"): ["Standard Range","Long Range","Plaid"],
227
+ ("Tesla","Model 3"): ["Standard Range Plus","Long Range","Performance"],
228
+ ("Tesla","Model X"): ["Standard Range","Long Range","Plaid"],
229
+ ("Tesla","Model Y"): ["Standard Range","Long Range","Performance"],
230
+ ("Porsche","911"): ["Carrera","Carrera S","Carrera 4","Carrera 4S","Turbo","Turbo S","GT3","GT3 RS","GT2 RS"],
231
+ ("Porsche","Boxster"): ["Base","S","GTS","Spyder"],
232
+ ("Porsche","Cayman"): ["Base","S","GTS","GT4"],
233
+ ("Porsche","Panamera"): ["Base","4","S","4S","GTS","Turbo","Turbo S"],
234
+ ("Porsche","Macan"): ["Base","S","GTS","Turbo"],
235
+ ("Porsche","Cayenne"): ["Base","S","GTS","Turbo","Turbo S","E-Hybrid"],
236
+ ("Porsche","Taycan"): ["Base","4S","Turbo","Turbo S"],
237
+ ("Jaguar","XE"): ["Base","Premium","Prestige","R-Sport","S"],
238
+ ("Jaguar","XF"): ["Premium","Prestige","R-Sport","S"],
239
+ ("Jaguar","XJ"): ["Premium Luxury","Portfolio","Autobiography","XJR575"],
240
+ ("Jaguar","F-TYPE"): ["Base","Premium","R-Dynamic","R","SVR"],
241
+ ("Jaguar","E-PACE"): ["Base","S","SE","HSE","R-Dynamic"],
242
+ ("Jaguar","F-PACE"): ["Premium","Prestige","R-Sport","S","SVR"],
243
+ ("Jaguar","I-PACE"): ["S","SE","HSE","First Edition"],
244
+ ("Land Rover","Range Rover Evoque"): ["S","SE","HSE","HSE Dynamic","Autobiography"],
245
+ ("Land Rover","Range Rover Velar"): ["S","SE","HSE","R-Dynamic","P380","P550"],
246
+ ("Land Rover","Range Rover Sport"): ["HSE","HSE Dynamic","Autobiography","SVR"],
247
+ ("Land Rover","Range Rover"): ["Base","HSE","Autobiography","SV","SVAutobiography"],
248
+ ("Land Rover","Discovery Sport"): ["S","SE","HSE","HSE Luxury"],
249
+ ("Land Rover","Discovery"): ["S","SE","HSE","HSE Luxury"],
250
+ ("Land Rover","Defender"): ["90","110","130","X","X-Dynamic","First Edition"],
251
+ ("Volvo","S60"): ["T5 Momentum","T5 R-Design","T6 R-Design","T8 Polestar"],
252
+ ("Volvo","S90"): ["T5 Momentum","T6 Momentum","T6 R-Design","T8 Inscription"],
253
+ ("Volvo","V60"): ["T5 Momentum","T5 R-Design","T6 R-Design","T8 Polestar"],
254
+ ("Volvo","V90"): ["T5 Momentum","T6 Momentum","T6 R-Design","T8 Inscription"],
255
+ ("Volvo","XC40"): ["T4 Momentum","T5 Momentum","T5 R-Design","Recharge"],
256
+ ("Volvo","XC60"): ["Core","Plus","Ultimate","Polestar Engineered"],
257
+ ("Volvo","XC90"): ["Momentum","R-Design","Inscription","Recharge"],
258
+ ("MINI","Cooper"): ["One","Cooper","Cooper S","John Cooper Works"],
259
+ ("Mini","Cooper"): ["Base","Classic","Signature","Iconic","John Cooper Works"],
260
+ ("Mini","Countryman"): ["Classic","Signature","Iconic","John Cooper Works"],
261
+ ("Mini","Clubman"): ["Classic","Signature","Iconic","John Cooper Works"],
262
+ ("Mazda","Mazda3"): ["Base","Select","Preferred","Premium","Turbo"],
263
+ ("Mazda","Mazda6"): ["Sport","Touring","Grand Touring","Grand Touring Reserve","Signature"],
264
+ ("Mazda","CX-3"): ["Sport","Touring","Grand Touring"],
265
+ ("Mazda","CX-30"): ["S","Select","Preferred","Premium","Turbo","Turbo Premium Plus"],
266
+ ("Mazda","CX-5"): ["S","Select","Preferred","Premium","Turbo","Turbo Signature"],
267
+ ("Mazda","CX-50"): ["2.5 S","2.5 S Select","2.5 S Preferred","2.5 Turbo","2.5 Turbo Premium"],
268
+ ("Mazda","CX-9"): ["Sport","Touring","Grand Touring","Signature","Carbon Edition"],
269
+ ("Mazda","MX-5 Miata"): ["Sport","Club","Grand Touring"],
270
+ ("Mazda","MX-30"): ["EV","EV Premium Plus"],
271
+ ("Subaru","Impreza"): ["Base","Premium","Sport","Limited"],
272
+ ("Subaru","WRX"): ["Base","Premium","Limited","GT","STI"],
273
+ ("Subaru","BRZ"): ["Premium","Limited","tS"],
274
+ ("Subaru","Legacy"): ["Base","Premium","Sport","Limited","Touring XT"],
275
+ ("Subaru","Outback"): ["Base","Premium","Limited","Onyx Edition XT","Wilderness","Touring XT"],
276
+ ("Subaru","Forester"): ["Base","Premium","Sport","Wilderness","Limited","Touring"],
277
+ ("Subaru","Crosstrek"): ["Base","Premium","Sport","Limited","Hybrid"],
278
+ ("Subaru","Ascent"): ["Base","Premium","Onyx Edition","Limited","Touring"],
279
+ ("Mitsubishi","Mirage"): ["ES","LE","SE","GT"],
280
+ ("Mitsubishi","Outlander"): ["ES","SE","SEL","Black Edition","GT","PHEV"],
281
+ ("Mitsubishi","Outlander Sport"): ["S","ES","LE","SE","GT"],
282
+ ("Mitsubishi","Eclipse Cross"): ["ES","LE","SE","SEL"],
283
+ ("Mitsubishi","Pajero"): ["GL","GLS","Exceed"],
284
+ ("Mitsubishi","L200 / Triton"): ["GLX","GLS","Exceed"],
285
+ ("Peugeot","208"): ["Active","Allure","GT"],
286
+ ("Peugeot","2008"): ["Active","Allure","GT"],
287
+ ("Peugeot","308"): ["Active","Allure","GT"],
288
+ ("Peugeot","3008"): ["Active","Allure","GT","GT Pack"],
289
+ ("Peugeot","5008"): ["Active","Allure","GT"],
290
+ ("Peugeot","508"): ["Active","Allure","GT","PSE"],
291
+ ("Renault","Clio"): ["Play","Iconic","S Edition","RS Line"],
292
+ ("Renault","Megane"): ["Play","Iconic","RS Line","RS Trophy"],
293
+ ("Renault","Captur"): ["Play","Iconic","S Edition","RS Line"],
294
+ ("Renault","Kadjar"): ["Play","Iconic","S Edition","GT Line"],
295
+ ("Renault","Arkana"): ["Iconic","S Edition","RS Line"],
296
+ ("Renault","Austral"): ["Equilibre","Techno","Esprit Alpine"],
297
+ ("Skoda","Fabia"): ["S","SE","SE L","Monte Carlo"],
298
+ ("Skoda","Octavia"): ["S","SE","SE L","SportLine","vRS"],
299
+ ("Skoda","Superb"): ["SE","SE L","SportLine","L&K"],
300
+ ("Skoda","Karoq"): ["SE Drive","SE L","SportLine"],
301
+ ("Skoda","Kodiaq"): ["SE","SE L","SportLine","vRS"],
302
+ ("Seat","Ibiza"): ["Reference","Style","Xcellence","FR"],
303
+ ("Seat","Leon"): ["Reference","Style","Xcellence","FR","Cupra"],
304
+ ("Seat","Arona"): ["Reference","Style","Xcellence","FR"],
305
+ ("Seat","Ateca"): ["Reference","Style","Xcellence","FR"],
306
+ ("Seat","Tarraco"): ["SE","SE Technology","Xcellence","FR"],
307
+ }
308
+ def _norm(x): return str(x or "").strip().lower()
309
+ REAL_TRIMS_N = {(_norm(mk), _norm(md)): trims for (mk, md), trims in REAL_TRIMS.items()}
310
+
311
+ def load_df():
312
+ if os.path.exists(CSV_MAIN):
313
+ return pd.read_csv(CSV_MAIN)
314
+ parts = sorted(glob.glob(CSV_PARTS_GLOB))
315
+ if parts:
316
+ df = pd.concat([pd.read_csv(p) for p in parts], ignore_index=True)
317
+ df.to_csv(CSV_MAIN, index=False)
318
+ return df
319
+ raise FileNotFoundError("Upload RideSearch_dataset.csv or the parts RideSearch_part*_small.csv.")
320
+
321
+ def unify_columns(df: pd.DataFrame) -> pd.DataFrame:
322
+ df = df.copy()
323
+ if "popularity" in df.columns and "popularity_score" not in df.columns: df["popularity_score"] = df["popularity"]
324
+ if "comfort" in df.columns and "comfort_score" not in df.columns: df["comfort_score"] = df["comfort"]
325
+ if "reliability" in df.columns and "reliability_score" not in df.columns: df["reliability_score"] = df["reliability"]
326
+ if "safety" in df.columns and "safety_rating" not in df.columns: df["safety_rating"] = df["safety"]
327
+ if "trim_display" not in df.columns: df["trim_display"] = df["trim"] if "trim" in df.columns else ""
328
+ for c in NUM_COLS_CANON:
329
+ if c not in df.columns: df[c] = np.nan
330
+ for c in NUM_COLS_CANON: df[c] = pd.to_numeric(df[c], errors="coerce")
331
+ if "text_record" not in df.columns:
332
+ cols = ["make","model","trim_display","body_type","fuel","engine_type"]
333
+ cols = [c for c in cols if c in df.columns]
334
+ df["text_record"] = df[cols].fillna("").astype(str).agg(" ".join, axis=1)
335
+ for col in ["make","model","body_type","fuel","engine_type","name"]:
336
+ if col not in df.columns: df[col] = ""
337
+ return df
338
+
339
+ def suspicious_zero_to_100(series: pd.Series) -> bool:
340
+ s = series.dropna()
341
+ if len(s) < 20: return False
342
+ return (s.nunique() <= max(2, int(0.03*len(s)))) or (s.std() < 0.18)
343
+
344
+ def estimate_0_100(row):
345
+ try: hp = float(row.get("horsepower", 150) or 150)
346
+ except Exception: hp = 150.0
347
+ body = str(row.get("body_type","")).lower()
348
+ fuel = str(row.get("fuel","")).lower()
349
+ trim = f"{row.get('model','')} {row.get('trim_display','')}".lower()
350
+ base = 26.0 - 3.2*np.log(max(hp, 60.0))
351
+ if any(k in body for k in ["suv","crossover","pickup","truck","van"]): base += 0.7
352
+ if any(k in body for k in ["coupe","roadster"]): base -= 0.4
353
+ if "electric" in fuel or "ev" in fuel: base -= 0.8
354
+ if "hybrid" in fuel: base -= 0.3
355
+ if "diesel" in fuel: base += 0.2
356
+ if any(x in trim for x in [" m "," amg","rs","type r","sti","gts","gt3","hellcat","svr","cupra","john cooper works"]): base -= 0.6
357
+ return float(np.clip(round(base,2), 2.8, 14.5))
358
+
359
+ def ensure_embeddings(df):
360
+ txt_ok = os.path.exists(EMB_TEXT_NPY)
361
+ num_ok = os.path.exists(EMB_NUM_NPY)
362
+ if txt_ok and num_ok:
363
+ return np.load(EMB_TEXT_NPY), np.load(EMB_NUM_NPY)
364
+ if SentenceTransformer is not None:
365
+ model = SentenceTransformer(EMBED_MODEL)
366
+ Etext = model.encode(df["text_record"].astype(str).tolist(), batch_size=256, show_progress_bar=False, normalize_embeddings=True).astype("float32")
367
+ else:
368
+ if HashingVectorizer is None: raise RuntimeError("Install sentence-transformers or scikit-learn for text embeddings.")
369
+ hv = HashingVectorizer(n_features=512, alternate_sign=False, norm="l2")
370
+ Etext = hv.transform(df["text_record"].fillna("").tolist()).toarray().astype("float32")
371
+ np.save(EMB_TEXT_NPY, Etext)
372
+ X = df[["horsepower","zero_to_100_kmh_s","seats","cargo_liters","price_usd","popularity_score","comfort_score","reliability_score","tech_score","ownership_cost_score","safety_rating"]].copy()
373
+ X = X.fillna(X.mean(numeric_only=True))
374
+ scaler = StandardScaler()
375
+ Enum = scaler.fit_transform(X.values.astype("float32")).astype("float32")
376
+ np.save(EMB_NUM_NPY, Enum)
377
+ return Etext, Enum
378
+
379
+ def load_overrides():
380
+ if os.path.exists(TRIMS_OVERRIDES):
381
+ try:
382
+ with open(TRIMS_OVERRIDES, "r", encoding="utf-8") as f: return json.load(f)
383
+ except Exception: return {}
384
+ return {}
385
+
386
+ def save_overrides(data):
387
+ with open(TRIMS_OVERRIDES, "w", encoding="utf-8") as f: json.dump(data, f, indent=2, ensure_ascii=False)
388
+
389
+ def learned_trims_from_csv(df):
390
+ out = {}
391
+ for (mk, md), sub in df.groupby(["make", "model"]):
392
+ vals = (sub["trim_display"].dropna().astype(str).map(str.strip).replace({"": np.nan}).dropna().value_counts().index.tolist())
393
+ if vals: out[(_norm(mk), _norm(md))] = vals[:20]
394
+ return out
395
+
396
+ def get_trims_for(make, model, learned, overrides):
397
+ if overrides.get(make, {}).get(model): return overrides[make][model]
398
+ mk, md = _norm(make), _norm(model)
399
+ if (mk, md) in REAL_TRIMS_N: return REAL_TRIMS_N[(mk, md)]
400
+ if (mk, md) in learned: return learned[(mk, md)]
401
+ return ["Base"]
402
+
403
+ def wiki_image(make, model, year=None):
404
+ q = f"{year} {make} {model}" if year else f"{make} {model}"
405
+ try:
406
+ r = requests.get("https://en.wikipedia.org/w/api.php", params={"action":"query","format":"json","prop":"pageimages","piprop":"thumbnail","pithumbsize":600,"generator":"search","gsrsearch":q,"gsrlimit":1,"gsrnamespace":0}, timeout=6).json()
407
+ pages = r.get("query",{}).get("pages",{})
408
+ if pages:
409
+ page = next(iter(pages.values()))
410
+ return page.get("thumbnail",{}).get("source")
411
+ except Exception:
412
+ pass
413
+ return None
414
+
415
+ def cosine_sim_row_vs_mat(mat, vec):
416
+ mat = mat / (np.linalg.norm(mat, axis=1, keepdims=True)+1e-9)
417
+ v = vec / (np.linalg.norm(vec)+1e-9)
418
+ return mat @ v
419
+
420
+ def recommend(df, Etext, Enum, make, model, trim, year, topk, alpha, body, fuel, y_min, y_max, p_min, p_max, safety, reliab, cross_brand_only=True, exclude_same_model=True, unique_brand=True):
421
+ sub = df[(df["make"]==make) & (df["model"]==model)].copy()
422
+ if trim: sub = sub[sub["trim_display"].astype(str).str.lower()==str(trim).lower()]
423
+ if year: sub = sub[pd.to_numeric(sub["year"], errors="coerce")==int(year)]
424
+ if sub.empty:
425
+ sub = df[(df["make"]==make) & (df["model"]==model)].copy()
426
+ if sub.empty: return "No such make/model in dataset.", None, "", []
427
+ a = sub.sort_values("popularity_score", ascending=False).iloc[0]
428
+ a_idx = int(a.name)
429
+ pool = df.copy()
430
+ if cross_brand_only: pool = pool[pool["make"] != a["make"]]
431
+ if exclude_same_model: pool = pool[~((pool["make"]==a["make"]) & (pool["model"]==a["model"]))]
432
+ if body and body!="Any": pool = pool[pool["body_type"]==body]
433
+ if fuel and fuel!="Any": pool = pool[pool["fuel"]==fuel]
434
+ pool = pool[(pool["year"]>=y_min) & (pool["year"]<=y_max)]
435
+ pool = pool[(pool["price_usd"]>=p_min) & (pool["price_usd"]<=p_max)]
436
+ pool = pool[(pool["safety_rating"]>=safety) & (pool["reliability_score"]>=reliab)]
437
+ if pool.empty: return "No cars after your filters. Loosen price/year/safety.", None, "", []
438
+ cand_idx = pool.index.values
439
+ st = cosine_sim_row_vs_mat(Etext[cand_idx], Etext[a_idx])
440
+ sn = cosine_sim_row_vs_mat(Enum[cand_idx], Enum[a_idx])
441
+ s = float(alpha)*st + (1-float(alpha))*sn
442
+ order = np.argsort(-s)
443
+ chosen, seen = [], set()
444
+ for j in order:
445
+ k = cand_idx[j]
446
+ brand = str(df.loc[k,"make"]).lower()
447
+ if unique_brand and brand in seen: continue
448
+ seen.add(brand)
449
+ chosen.append(k)
450
+ if len(chosen) >= int(topk): break
451
+ if not chosen: return "No recommendations after constraints.", None, "", []
452
+ out = df.loc[chosen].copy()
453
+ sim_lookup = {cand_idx[i]: float(s[i]) for i in range(len(cand_idx))}
454
+ out["similarity_%"] = [round(sim_lookup[k]*100,1) for k in chosen]
455
+ cols = ["name","make","model","trim_display","year","body_type","fuel","engine_type","price_usd","horsepower","zero_to_100_kmh_s","popularity_score","comfort_score","reliability_score","tech_score","ownership_cost_score","safety_rating","similarity_%"]
456
+ table = out[[c for c in cols if c in out.columns]].reset_index(drop=True)
457
+ disp_trim = str(a.get("trim_display","")).strip()
458
+ anchor_md = (f"**{a['make']} {a['model']} {disp_trim} {int(a['year']) if pd.notna(a['year']) else ''}** \n"
459
+ f"Body: {a.get('body_type','')} β€’ Fuel: {a.get('fuel','')} β€’ Engine: {a.get('engine_type','')} \n"
460
+ f"HP: {int(a['horsepower']) if pd.notna(a['horsepower']) else 'β€”'} β€’ 0–100: {a.get('zero_to_100_kmh_s','β€”')}s "
461
+ f"β€’ Price: ${int(a['price_usd']) if pd.notna(a['price_usd']) else 'β€”'} \n"
462
+ f"Popularity {int(a.get('popularity_score',0))}/10 β€’ Comfort {int(a.get('comfort_score',0))}/10 β€’ "
463
+ f"Reliability {int(a.get('reliability_score',0))}/100 β€’ Safety {int(a.get('safety_rating',0))}β˜…")
464
+ cards = []
465
+ for _, r in out.head(3).iterrows():
466
+ img = wiki_image(r.get("make",""), r.get("model",""), r.get("year",None))
467
+ title = f"{r.get('make','')} {r.get('model','')} {str(r.get('trim_display','')).strip()}"
468
+ cards.append((img, title))
469
+ return None, table, anchor_md, cards
470
+
471
+ def build_app():
472
+ df = unify_columns(load_df())
473
+ if suspicious_zero_to_100(df["zero_to_100_kmh_s"]): df["zero_to_100_kmh_s"] = df.apply(estimate_0_100, axis=1)
474
+ Etext, Enum = ensure_embeddings(df)
475
+ learned = learned_trims_from_csv(df)
476
+ overrides = load_overrides()
477
+ makes = sorted(df["make"].dropna().astype(str).unique().tolist())
478
+ body_choices = ["Any"] + sorted([b for b in df["body_type"].dropna().astype(str).unique().tolist() if b])
479
+ fuel_choices = ["Any"] + sorted([f for f in df["fuel"].dropna().astype(str).unique().tolist() if f])
480
+ y_lo, y_hi = int(df["year"].min(skipna=True)), int(df["year"].max(skipna=True))
481
+ p_lo, p_hi = int(df["price_usd"].min(skipna=True)), int(df["price_usd"].max(skipna=True))
482
+
483
+ def models_for(make):
484
+ if not make: return gr.update(choices=[], value=None)
485
+ opts = sorted(df.loc[df["make"].eq(make), "model"].dropna().astype(str).unique().tolist())
486
+ return gr.update(choices=opts, value=None)
487
+
488
+ def trims_years_for(make, model):
489
+ if not make or not model: return gr.update(choices=[], value=None), gr.update(choices=[], value=None)
490
+ trims = get_trims_for(make, model, learned, overrides)
491
+ years = sorted(pd.to_numeric(df[(df["make"]==make)&(df["model"]==model)]["year"], errors="coerce").dropna().astype(int).unique().tolist())
492
+ return gr.update(choices=trims, value=None), gr.update(choices=[None]+years, value=None)
493
+
494
+ with gr.Blocks(theme=gr.themes.Soft(), title="RideSearch") as demo:
495
+ gr.Markdown("## RideSearch β€” cross-brand recommendations with realistic trims & photos")
496
+ with gr.Tab("Pick & Recommend"):
497
+ with gr.Row():
498
+ mk = gr.Dropdown(makes, label="Make")
499
+ md = gr.Dropdown([], label="Model")
500
+ tr = gr.Dropdown([], label="Trim (optional)")
501
+ yr = gr.Dropdown([], label="Year (optional)")
502
+ mk.change(models_for, mk, md)
503
+ md.change(lambda a,b: trims_years_for(a,b), [mk, md], [tr, yr])
504
+ with gr.Row():
505
+ body = gr.Dropdown(body_choices, value="Any", label="Body")
506
+ fuel = gr.Dropdown(fuel_choices, value="Any", label="Fuel")
507
+ with gr.Row():
508
+ y_min = gr.Slider(y_lo, y_hi, value=y_lo, step=1, label="Year min")
509
+ y_max = gr.Slider(y_lo, y_hi, value=y_hi, step=1, label="Year max")
510
+ with gr.Row():
511
+ p_min = gr.Slider(p_lo, p_hi, value=p_lo, step=500, label="Price min (USD)")
512
+ p_max = gr.Slider(p_lo, p_hi, value=min(p_hi, max(p_lo+5000, p_lo+20000)), step=500, label="Price max (USD)")
513
+ with gr.Row():
514
+ safety = gr.Slider(3, 5, value=4, step=1, label="Min Safety β˜…")
515
+ reliab = gr.Slider(55, 99, value=70, step=1, label="Min Reliability")
516
+ with gr.Row():
517
+ topk = gr.Slider(1, 10, value=5, step=1, label="Recommendations")
518
+ alpha = gr.Slider(0, 1, value=0.7, step=0.05, label="Ξ± β€” Text vs Numeric")
519
+ with gr.Row():
520
+ cross = gr.Checkbox(True, label="Cross-brand only")
521
+ xmodel = gr.Checkbox(True, label="Exclude same model family")
522
+ uniqb = gr.Checkbox(True, label="Unique brands (no repeats)")
523
+ run = gr.Button("Recommend", variant="primary")
524
+ err = gr.Markdown()
525
+ anchor_md = gr.Markdown()
526
+ table = gr.Dataframe(interactive=False, wrap=True, label="Recommendations")
527
+ gallery = gr.Gallery(label="Photos", height=220, columns=[3])
528
+
529
+ def on_click(mk_, md_, tr_, yr_, topk_, alpha_, body_, fuel_, y_min_, y_max_, p_min_, p_max_, safety_, reliab_, cross_, xmodel_, uniqb_):
530
+ msg, tbl, atext, cards = recommend(
531
+ df, Etext, Enum, mk_, md_, tr_, yr_, int(topk_), float(alpha_),
532
+ body_, fuel_, int(y_min_), int(y_max_), int(p_min_), int(p_max_), int(safety_), int(reliab_),
533
+ cross_brand_only=bool(cross_), exclude_same_model=bool(xmodel_), unique_brand=bool(uniqb_)
534
+ )
535
+ if msg: return gr.update(value=f"**{msg}**"), gr.update(value=""), pd.DataFrame(), []
536
+ return gr.update(value=""), gr.update(value=atext), tbl, cards
537
+
538
+ run.click(on_click,
539
+ [mk, md, tr, yr, topk, alpha, body, fuel, y_min, y_max, p_min, p_max, safety, reliab, cross, xmodel, uniqb],
540
+ [err, anchor_md, table, gallery])
541
+
542
+ with gr.Tab("Admin β€’ Trim Overrides"):
543
+ gr.Markdown("Paste correct trims (one per line) for a model. Saved to trims_overrides.json.")
544
+ a_mk = gr.Dropdown(makes, label="Make")
545
+ a_md = gr.Dropdown([], label="Model")
546
+ a_txt = gr.Textbox(lines=8, label="Display trims (one per line)")
547
+ save_btn = gr.Button("Save override")
548
+ save_msg = gr.Markdown()
549
+ a_mk.change(models_for, a_mk, a_md)
550
+
551
+ def do_save(make, model, txt):
552
+ nonlocal overrides
553
+ trims = [t.strip() for t in str(txt).splitlines() if t.strip()]
554
+ if not make or not model or not trims: return "⚠️ Provide make, model, and at least one trim."
555
+ if make not in overrides: overrides[make] = {}
556
+ overrides[make][model] = trims
557
+ save_overrides(overrides)
558
+ return f"βœ… Saved {len(trims)} trims for {make} {model}. Refresh the Pick tab."
559
+ save_btn.click(do_save, [a_mk, a_md, a_txt], save_msg)
560
+
561
+ gr.Markdown("First run may be slow while embeddings build.")
562
+ return demo
563
+
564
+ if __name__ == "__main__":
565
+ demo = build_app()
566
+ demo.queue().launch(server_name="0.0.0.0", server_port=7860)