Levimichael4 commited on
Commit
ac022f0
·
verified ·
1 Parent(s): 0e0eb96

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +372 -94
app.py CHANGED
@@ -1,113 +1,391 @@
 
1
 
2
- import os, glob, numpy as np, pandas as pd
 
 
3
  from sklearn.metrics.pairwise import cosine_similarity
4
  from sklearn.preprocessing import StandardScaler
5
- import gradio as gr, plotly.express as px
 
 
 
 
6
 
7
  def load_df():
8
  if os.path.exists('RideSearch_dataset.csv'):
9
  return pd.read_csv('RideSearch_dataset.csv')
10
  parts = sorted(glob.glob('RideSearch_part*_small.csv'))
11
- if parts:
12
- df = pd.concat([pd.read_csv(p) for p in parts], ignore_index=True)
13
- df.to_csv('RideSearch_dataset.csv', index=False)
14
- return df
15
- raise FileNotFoundError('Upload dataset (parts or full) first.')
16
 
17
  DF = load_df()
18
- NUM = ['horsepower','zero_to_100_kmh_s','seats','cargo_liters','price_usd','popularity_score','comfort_score','reliability_score','tech_score','ownership_cost_score','safety_rating']
 
 
 
 
 
19
 
20
  def ensure_emb():
21
- if not (os.path.exists('emb_text.npy') and os.path.exists('emb_num.npy')):
22
- from sentence_transformers import SentenceTransformer
23
- m = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
24
- te = m.encode(DF['text_record'].astype(str).tolist(), batch_size=256, show_progress_bar=True, normalize_embeddings=True)
25
- np.save('emb_text.npy', np.asarray(te, dtype='float32'))
26
- X = DF[NUM].copy(); X['zero_to_100_kmh_s'] = -X['zero_to_100_kmh_s']
27
- Xs = StandardScaler().fit_transform(X.values.astype('float32'))
28
- np.save('emb_num.npy', Xs.astype('float32'))
29
- return np.load('emb_text.npy'), np.load('emb_num.npy')
30
-
31
- def mapping():
32
- m = {}
33
- for mk, g in DF.groupby('make'):
34
- m[mk] = {}
35
- for md, g2 in g.groupby('model'):
36
- m[mk][md] = {'trims': sorted(g2['trim'].astype(str).unique().tolist())[:20], 'years': sorted(g2['year'].astype(int).unique().tolist())}
37
- return m
38
- MAP = mapping()
39
-
40
- def models_for(mk): return sorted(MAP.get(mk, {}).keys()) if mk else []
41
- def trim_year(mk, md):
42
- d = MAP.get(mk, {}).get(md, {})
43
- return d.get('trims', []), d.get('years', [])
44
-
45
- def anchor(mk, md, tr, yr):
46
- sub = DF.copy()
47
- if mk: sub = sub[sub['make'] == mk]
48
- if md: sub = sub[sub['model'] == md]
49
- if tr: sub = sub[sub['trim'] == tr]
50
- if yr: sub = sub[sub['year'] == yr]
51
- return None if sub.empty else sub.sort_values('popularity_score', ascending=False).iloc[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
- def filt(df, body, fuel, yr_rng, price, safety, rel):
 
 
 
 
54
  out = df.copy()
55
  if body != 'Any': out = out[out['body_type'] == body]
56
  if fuel != 'Any': out = out[out['fuel'] == fuel]
57
- y0, y1 = yr_rng; p0, p1 = price
58
- out = out[(out['year'] >= y0) & (out['year'] <= y1) & (out['price_usd'] >= p0) & (out['price_usd'] <= p1)]
59
- out = out[(out['safety_rating'] >= safety) & (out['reliability_score'] >= rel)]
60
  return out
61
 
62
- def fmt(r):
63
- eff = (f"{int(r['city_mpg'])}-{int(r['highway_mpg'])} mpg" if pd.notna(r['city_mpg']) else f"{int(r['range_km'])} km range")
64
- return (
65
- f"**{r['name']}**\n"
66
- f"- Brand: {r['make']} | Body: {r['body_type']} | Fuel: {r['fuel']}\n"
67
- f"- HP: {int(r['horsepower'])} | 0–100: {r['zero_to_100_kmh_s']} s | Price: ${int(r['price_usd']):,}\n"
68
- f"- Popularity {int(r['popularity_score'])}/10 • Comfort {int(r['comfort_score'])}/10 • Reliability {int(r['reliability_score'])}/100 • Safety {int(r['safety_rating'])}★"
69
- )
70
-
71
- def rec(mk, md, tr, yr, topk, alpha, body, fuel, yr_rng, price, safety, rel):
72
- a = anchor(mk, md, tr, yr)
73
- if a is None: return 'No match for that combo.', None, None
74
- sub = filt(DF, body, fuel, yr_rng, price, safety, rel)
75
- if sub.empty: return 'No cars after filters.', None, None
76
- Et, En = ensure_emb(); idx = a.name; cand = sub.index.values
77
- st = cosine_similarity(Et[idx:idx+1], Et[cand])[0]; sn = cosine_similarity(En[idx:idx+1], En[cand])[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  s = float(alpha)*st + (1-float(alpha))*sn
79
- import numpy as np
80
- if idx in cand: s[np.where(cand==idx)[0][0]] = -1
81
- order = np.argsort(-s)[:topk]; sel = DF.loc[cand[order]].copy(); sel['similarity_%'] = (s[order]*100).round(1)
82
- cols = ['name','make','model','trim','year','body_type','fuel','engine_type','price_usd','horsepower','zero_to_100_kmh_s','popularity_score','comfort_score','reliability_score','tech_score','ownership_cost_score','safety_rating','similarity_%']
83
- return fmt(a), sel[cols], f'α={alpha:.2f}'
84
-
85
- with gr.Blocks() as demo:
86
- gr.Markdown('# RideSearch — Pick a car, get similar across brands')
87
-
88
- with gr.Tab('Pick & Recommend'):
89
- with gr.Row():
90
- mk = gr.Dropdown(sorted(DF['make'].unique().tolist()), label='Make')
91
- md = gr.Dropdown([], label='Model')
92
- tr = gr.Dropdown([], label='Trim (optional)')
93
- yr = gr.Dropdown([], label='Year (optional)')
94
- mk.change(models_for, mk, md)
95
- def _u(a,b): t,y = trim_year(a,b); return t,y
96
- md.change(_u, [mk,md], [tr,yr])
97
-
98
- with gr.Row():
99
- body = gr.Dropdown(['Any']+sorted(DF['body_type'].unique().tolist()), value='Any', label='Body')
100
- fuel = gr.Dropdown(['Any']+sorted(DF['fuel'].unique().tolist()), value='Any', label='Fuel')
101
- yr_rng = gr.RangeSlider(int(DF['year'].min()), int(DF['year'].max()), value=[int(DF['year'].min()), int(DF['year'].max())], step=1, label='Year range')
102
- with gr.Row():
103
- price = gr.RangeSlider(int(DF['price_usd'].min()), int(DF['price_usd'].max()), value=[int(DF['price_usd'].min()), min(int(DF['price_usd'].max()),60000)], step=500, label='Price (USD)')
104
- safety = gr.Slider(3,5,value=4,step=1,label='Min Safety ★')
105
- rel = gr.Slider(55,99,value=70,step=1,label='Min Reliability')
106
- with gr.Row():
107
- topk = gr.Slider(1,10,value=5,step=1,label='Recommendations')
108
- alpha = gr.Slider(0,1,value=0.7,step=0.05,label='α — Text vs Numeric')
109
- btn = gr.Button('Recommend')
110
- a = gr.Markdown(); recs = gr.Dataframe(interactive=False); note = gr.Markdown()
111
- btn.click(rec, [mk,md,tr,yr,topk,alpha,body,fuel,yr_rng,price,safety,rel], [a,recs,note])
112
-
113
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app_new.py — RideSearch (brand-correct trims, cross-brand, smart fallbacks, optional photos)
2
 
3
+ import os, glob, urllib.parse
4
+ import numpy as np
5
+ import pandas as pd
6
  from sklearn.metrics.pairwise import cosine_similarity
7
  from sklearn.preprocessing import StandardScaler
8
+ import gradio as gr
9
+
10
+ # =========================
11
+ # Data loading & embeddings
12
+ # =========================
13
 
14
  def load_df():
15
  if os.path.exists('RideSearch_dataset.csv'):
16
  return pd.read_csv('RideSearch_dataset.csv')
17
  parts = sorted(glob.glob('RideSearch_part*_small.csv'))
18
+ if not parts:
19
+ raise FileNotFoundError("Upload RideSearch_dataset.csv OR the 10 parts RideSearch_part*_small.csv.")
20
+ df = pd.concat([pd.read_csv(p) for p in parts], ignore_index=True)
21
+ df.to_csv('RideSearch_dataset.csv', index=False)
22
+ return df
23
 
24
  DF = load_df()
25
+
26
+ NUM_COLS = [
27
+ 'horsepower','zero_to_100_kmh_s','seats','cargo_liters','price_usd',
28
+ 'popularity_score','comfort_score','reliability_score','tech_score',
29
+ 'ownership_cost_score','safety_rating'
30
+ ]
31
 
32
  def ensure_emb():
33
+ txt_ok = os.path.exists('emb_text.npy')
34
+ num_ok = os.path.exists('emb_num.npy')
35
+ if txt_ok and num_ok:
36
+ return np.load('emb_text.npy'), np.load('emb_num.npy')
37
+
38
+ from sentence_transformers import SentenceTransformer
39
+ m = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
40
+ texts = DF['text_record'].astype(str).tolist()
41
+ Etext = m.encode(texts, batch_size=256, show_progress_bar=True, normalize_embeddings=True)
42
+ Etext = np.asarray(Etext, dtype='float32'); np.save('emb_text.npy', Etext)
43
+
44
+ X = DF[NUM_COLS].copy()
45
+ if 'zero_to_100_kmh_s' in X.columns:
46
+ X['zero_to_100_kmh_s'] = -X['zero_to_100_kmh_s'].astype('float32')
47
+ Xs = StandardScaler().fit_transform(X.values.astype('float32'))
48
+ Enum = Xs.astype('float32'); np.save('emb_num.npy', Enum)
49
+ return Etext, Enum
50
+
51
+ # ==========================================
52
+ # Brand-correct trim display & alias mapping
53
+ # ==========================================
54
+ # Expand this list for the models you’ll demo.
55
+ TRIM_CHOICES = {
56
+ ("BMW","3 Series"): ["320i","330i","330e","340i","M3"],
57
+ ("Audi","A3"): ["35 TFSI","40 TFSI","45 TFSI","S3","RS3"],
58
+ ("Audi","A4"): ["35 TFSI","40 TFSI","45 TFSI","S4","RS4"],
59
+ ("Mercedes-Benz","C-Class"): ["C200","C220d","C300","AMG C43","AMG C63"],
60
+ ("Lexus","IS"): ["IS 300","IS 350","IS 500 F SPORT"],
61
+ ("Toyota","Corolla"): ["L","LE","SE","XSE","GR"],
62
+ ("Honda","Civic"): ["LX","Sport","EX","Touring","Type R"],
63
+ ("Volkswagen","Golf"): ["Trendline","Comfortline","Highline","GTI","R"],
64
+ ("Hyundai","Elantra"): ["SE","SEL","Limited","N Line","N"],
65
+ ("Kia","Forte"): ["LX","S","EX","GT-Line","GT"],
66
+ ("Jeep","Wrangler"): ["Sport","Willys","Sahara","Rubicon","392"],
67
+ ("Land Rover","Defender"): ["90","110","130","X-Dynamic","V8"],
68
+ ("Mazda","Mazda3"): ["S","Select","Preferred","Premium","Turbo"],
69
+ ("Mitsubishi","Outlander"): ["ES","SE","SEL","Black Edition","PHEV"],
70
+ ("Nissan","Rogue"): ["S","SV","SL","Platinum"],
71
+ ("Peugeot","3008"): ["Active","Allure","GT","GT Pack"],
72
+ ("Porsche","911"): ["Carrera","Carrera S","GTS","Turbo","GT3"],
73
+ ("Ram","1500"): ["Tradesman","Big Horn","Laramie","Rebel","Limited"],
74
+ ("Renault","Clio"): ["Authentique","Expression","Dynamique","RS Line"],
75
+ ("Seat","Leon"): ["Reference","Style","FR","Cupra"],
76
+ ("Skoda","Octavia"): ["Active","Ambition","Style","RS"],
77
+ ("Subaru","Outback"): ["Base","Premium","Limited","Wilderness","Touring"],
78
+ ("Tesla","Model 3"): ["RWD","Long Range","Performance"],
79
+ ("Volkswagen","Tiguan"): ["S","SE","SEL","R-Line"],
80
+ ("Volvo","XC60"): ["Core","Plus","Ultimate","Polestar Engineered"],
81
+ ("Mini","Cooper"): ["Classic","Signature","Iconic","John Cooper Works"],
82
+ }
83
+
84
+ # Map display trims to dataset generic categories (Base/Sport/Premium/Performance).
85
+ TRIM_ALIAS_TO_GENERIC = {
86
+ # BMW 3
87
+ "320i":"Base","330i":"Sport","330e":"Sport","340i":"Premium","M3":"Performance",
88
+ # Audi
89
+ "35 TFSI":"Base","40 TFSI":"Sport","45 TFSI":"Premium","S3":"Performance","RS3":"Performance",
90
+ "S4":"Performance","RS4":"Performance",
91
+ # Mercedes C
92
+ "C200":"Base","C220d":"Base","C300":"Premium","AMG C43":"Performance","AMG C63":"Performance",
93
+ # Lexus IS
94
+ "IS 300":"Base","IS 350":"Premium","IS 500 F SPORT":"Performance",
95
+ # Toyota Corolla
96
+ "L":"Base","LE":"Base","SE":"Sport","XSE":"Premium","GR":"Performance",
97
+ # Honda Civic
98
+ "LX":"Base","Sport":"Sport","EX":"Premium","Touring":"Premium","Type R":"Performance",
99
+ # VW
100
+ "Trendline":"Base","Comfortline":"Base","Highline":"Premium","GTI":"Performance","R":"Performance",
101
+ "S":"Base","SE":"Sport","SEL":"Premium","R-Line":"Performance",
102
+ # Hyundai Elantra
103
+ "SE":"Base","SEL":"Base","Limited":"Premium","N Line":"Sport","N":"Performance",
104
+ # Kia Forte
105
+ "LX":"Base","S":"Sport","EX":"Premium","GT-Line":"Sport","GT":"Performance",
106
+ # Jeep Wrangler
107
+ "Sport":"Base","Willys":"Sport","Sahara":"Premium","Rubicon":"Performance","392":"Performance",
108
+ # Land Rover Defender
109
+ "90":"Base","110":"Base","130":"Premium","X-Dynamic":"Premium","V8":"Performance",
110
+ # Mazda3
111
+ "S":"Base","Select":"Base","Preferred":"Premium","Premium":"Premium","Turbo":"Performance",
112
+ # Mitsubishi Outlander
113
+ "ES":"Base","SE":"Sport","SEL":"Premium","Black Edition":"Premium","PHEV":"Premium",
114
+ # Nissan Rogue
115
+ "SV":"Sport","SL":"Premium","Platinum":"Premium",
116
+ # Peugeot 3008
117
+ "Active":"Base","Allure":"Premium","GT":"Premium","GT Pack":"Premium",
118
+ # Porsche 911
119
+ "Carrera":"Base","Carrera S":"Premium","GTS":"Premium","Turbo":"Performance","GT3":"Performance",
120
+ # Ram 1500
121
+ "Tradesman":"Base","Big Horn":"Sport","Laramie":"Premium","Rebel":"Sport","Limited":"Premium",
122
+ # Renault Clio
123
+ "Authentique":"Base","Expression":"Sport","Dynamique":"Premium","RS Line":"Performance",
124
+ # Seat Leon
125
+ "Reference":"Base","Style":"Sport","FR":"Sport","Cupra":"Performance",
126
+ # Skoda Octavia
127
+ "Active":"Base","Ambition":"Sport","Style":"Premium","RS":"Performance",
128
+ # Subaru Outback
129
+ "Base":"Base","Premium":"Premium","Limited":"Premium","Wilderness":"Sport","Touring":"Premium",
130
+ # Tesla Model 3
131
+ "RWD":"Base","Long Range":"Premium","Performance":"Performance",
132
+ # Volvo XC60
133
+ "Core":"Base","Plus":"Premium","Ultimate":"Premium","Polestar Engineered":"Performance",
134
+ # Mini Cooper
135
+ "Classic":"Base","Signature":"Premium","Iconic":"Premium","John Cooper Works":"Performance",
136
+ }
137
+
138
+ def normalize_trim_for_query(make, model, display_trim):
139
+ if not display_trim:
140
+ return None
141
+ # If we know this (make, model), map its display name to a generic token
142
+ if (make, model) in TRIM_CHOICES and display_trim in TRIM_ALIAS_TO_GENERIC:
143
+ return TRIM_ALIAS_TO_GENERIC[display_trim]
144
+ return display_trim # already generic or unknown model → pass through
145
+
146
+ def generic_to_display(make, model, generic_trim):
147
+ """
148
+ Convert dataset generic trim -> a realistic display trim for that make/model.
149
+ If unknown, return the generic string unchanged.
150
+ """
151
+ if not generic_trim:
152
+ return ""
153
+ if (make, model) not in TRIM_CHOICES:
154
+ return str(generic_trim)
155
+ # pick the first display alias that maps back to this generic
156
+ for alias in TRIM_CHOICES[(make, model)]:
157
+ if TRIM_ALIAS_TO_GENERIC.get(alias) == generic_trim:
158
+ return alias
159
+ return str(generic_trim)
160
+
161
+ # ==============================
162
+ # Dropdowns, filters, formatting
163
+ # ==============================
164
+
165
+ def models_for(make):
166
+ if not make:
167
+ return gr.update(choices=[], value=None)
168
+ opts = sorted(DF.loc[DF['make'].eq(make), 'model'].dropna().unique().tolist())
169
+ return gr.update(choices=opts, value=None)
170
+
171
+ def trim_year(make, model):
172
+ if make and model and (make, model) in TRIM_CHOICES:
173
+ trims = TRIM_CHOICES[(make, model)]
174
+ else:
175
+ sub = DF
176
+ if make: sub = sub[sub['make'] == make]
177
+ if model: sub = sub[sub['model'] == model]
178
+ trims = sorted(sub['trim'].astype(str).dropna().unique().tolist())[:20]
179
+ if make and model:
180
+ years = sorted(
181
+ DF.loc[(DF['make'].eq(make)) & (DF['model'].eq(model)), 'year']
182
+ .dropna().astype(int).unique().tolist()
183
+ )
184
+ else:
185
+ years = []
186
+ return trims, years
187
 
188
+ def on_model_change(make, model):
189
+ trims, years = trim_year(make, model)
190
+ return gr.update(choices=trims, value=None), gr.update(choices=years, value=None)
191
+
192
+ def apply_filters(df, body, fuel, y_min, y_max, p_min, p_max, safety, reliab):
193
  out = df.copy()
194
  if body != 'Any': out = out[out['body_type'] == body]
195
  if fuel != 'Any': out = out[out['fuel'] == fuel]
196
+ out = out[(out['year'] >= y_min) & (out['year'] <= y_max)]
197
+ out = out[(out['price_usd'] >= p_min) & (out['price_usd'] <= p_max)]
198
+ out = out[(out['safety_rating'] >= safety) & (out['reliability_score'] >= reliab)]
199
  return out
200
 
201
+ def fmt_anchor(r):
202
+ disp_trim = generic_to_display(r['make'], r['model'], r['trim'])
203
+ return (f"**{r['make']} {r['model']} {disp_trim} {int(r['year'])}** \n"
204
+ f"Body: {r['body_type']} • Fuel: {r['fuel']} • Engine: {r['engine_type']} \n"
205
+ f"HP: {int(r['horsepower'])} 0–100: {r['zero_to_100_kmh_s']}s Price: ${int(r['price_usd']):,} \n"
206
+ f"Popularity {int(r['popularity_score'])}/10 Comfort {int(r['comfort_score'])}/10 • "
207
+ f"Reliability {int(r['reliability_score'])}/100 • Safety {int(r['safety_rating'])}★")
208
+
209
+ # ==============
210
+ # Photo helpers
211
+ # ==============
212
+ def placeholder_svg_data_uri(title):
213
+ # Clean inline SVG placeholder so we don't need internet or image hosting
214
+ txt = urllib.parse.quote(title)
215
+ svg = f"""<svg xmlns='http://www.w3.org/2000/svg' width='480' height='320'>
216
+ <rect width='100%' height='100%' fill='#e8eef7'/>
217
+ <text x='50%' y='50%' dominant-baseline='middle' text-anchor='middle'
218
+ font-family='Arial' font-size='26' fill='#223'>
219
+ {title}
220
+ </text>
221
+ </svg>"""
222
+ return "data:image/svg+xml;utf8," + urllib.parse.quote(svg)
223
+
224
+ def build_gallery_html(df_rows):
225
+ # df_rows: pandas DataFrame with rows to render
226
+ cards = []
227
+ for _, r in df_rows.iterrows():
228
+ label = f"{r['make']} {r['model']} {generic_to_display(r['make'], r['model'], r['trim'])}"
229
+ if 'image_url' in r and isinstance(r['image_url'], str) and r['image_url'].strip():
230
+ img_src = r['image_url'].strip()
231
+ else:
232
+ img_src = placeholder_svg_data_uri(f"{r['make']} {r['model']}")
233
+ cards.append(f"""
234
+ <div style="width:240px;margin:6px;border:1px solid #ddd;border-radius:12px;overflow:hidden;background:#fff;">
235
+ <img src="{img_src}" style="width:240px;height:160px;object-fit:cover;display:block" />
236
+ <div style="padding:8px 10px;font:14px/1.3 Arial,sans-serif;color:#111">{label}</div>
237
+ </div>
238
+ """)
239
+ return f"<div style='display:flex;flex-wrap:wrap'>{''.join(cards)}</div>"
240
+
241
+ # ===========================
242
+ # Anchor selection & ranking
243
+ # ===========================
244
+ def anchor_row(make, model, trim_display, year):
245
+ trim_generic = normalize_trim_for_query(make, model, trim_display)
246
+ sub = DF.copy()
247
+ if make: sub = sub[sub['make'] == make]
248
+ if model: sub = sub[sub['model'] == model]
249
+
250
+ def pick(df_): return None if df_.empty else df_.sort_values('popularity_score', ascending=False).iloc[0]
251
+
252
+ # 1) exact
253
+ exact = sub.copy()
254
+ if trim_generic: exact = exact[exact['trim'] == trim_generic]
255
+ if year: exact = exact[exact['year'] == year]
256
+ if not exact.empty: return pick(exact)
257
+
258
+ # 2) same year (ignore trim)
259
+ if year:
260
+ y_only = sub[sub['year'] == year]
261
+ if not y_only.empty: return pick(y_only)
262
+
263
+ # 3) same trim (ignore year)
264
+ if trim_generic:
265
+ t_only = sub[sub['trim'] == trim_generic]
266
+ if not t_only.empty: return pick(t_only)
267
+
268
+ # 4) fallback
269
+ return pick(sub)
270
+
271
+ def recommend(make, model, trim_display, year, topk, alpha,
272
+ body, fuel, y_min, y_max, p_min, p_max, safety, reliab,
273
+ cross_brand_only=True, exclude_same_model=True):
274
+ a = anchor_row(make, model, trim_display, year)
275
+ if a is None:
276
+ return "No match for that combo.", None, "", None
277
+
278
+ # candidate pool
279
+ pool = DF.copy()
280
+ if cross_brand_only:
281
+ pool = pool[pool['make'] != a['make']]
282
+ if exclude_same_model:
283
+ pool = pool[~((pool['make'] == a['make']) & (pool['model'] == a['model']))]
284
+
285
+ pool = apply_filters(pool, body, fuel, int(y_min), int(y_max), int(p_min), int(p_max), int(safety), int(reliab))
286
+ if pool.empty:
287
+ return "No cars after filters. Try widening year/price/safety.", None, "", None
288
+
289
+ Etext, Enum = ensure_emb()
290
+ idx_anchor = int(a.name)
291
+ cand_idx = pool.index.values
292
+ st = cosine_similarity(Etext[idx_anchor:idx_anchor+1], Etext[cand_idx])[0]
293
+ sn = cosine_similarity(Enum[idx_anchor:idx_anchor+1], Enum[cand_idx])[0]
294
  s = float(alpha)*st + (1-float(alpha))*sn
295
+
296
+ order = np.argsort(-s)
297
+ seen = set(); chosen = []
298
+ for j in order:
299
+ r = DF.loc[cand_idx[j]]
300
+ key = (r['make'], r['model'])
301
+ if key in seen: continue
302
+ seen.add(key); chosen.append(cand_idx[j])
303
+ if len(chosen) >= int(topk): break
304
+
305
+ if not chosen:
306
+ return "No recommendations found after constraints.", None, "", None
307
+
308
+ sel = DF.loc[chosen].copy()
309
+ # map generic -> display for table
310
+ sel['trim_display'] = sel.apply(lambda r: generic_to_display(r['make'], r['model'], r['trim']), axis=1)
311
+ sim_lookup = {cand_idx[j]: round(float(s[j])*100, 1) for j in order}
312
+ sel['similarity_%'] = sel.index.map(lambda k: sim_lookup.get(k, 0.0))
313
+
314
+ cols = [
315
+ 'name','make','model','trim_display','year','body_type','fuel','engine_type',
316
+ 'price_usd','horsepower','zero_to_100_kmh_s',
317
+ 'popularity_score','comfort_score','reliability_score',
318
+ 'tech_score','ownership_cost_score','safety_rating','similarity_%'
319
+ ]
320
+ note = (f"α = {float(alpha):.2f} (text ↔ numeric) • Cross-brand only = {cross_brand_only} "
321
+ f"• Exclude same model = {exclude_same_model}")
322
+ gallery = build_gallery_html(sel)
323
+
324
+ return fmt_anchor(a), sel[cols], note, gallery
325
+
326
+ # ============
327
+ # Gradio UI
328
+ # ============
329
+ def build_ui():
330
+ y_lo, y_hi = int(DF['year'].min()), int(DF['year'].max())
331
+ p_lo, p_hi = int(DF['price_usd'].min()), int(DF['price_usd'].max())
332
+
333
+ with gr.Blocks() as demo:
334
+ gr.Markdown("# RideSearch — cross-brand recommendations with real trims")
335
+
336
+ with gr.Tab("Pick & Recommend"):
337
+ with gr.Row():
338
+ mk = gr.Dropdown(sorted(DF['make'].dropna().unique().tolist()), label="Make", value=None)
339
+ md = gr.Dropdown([], label="Model", value=None)
340
+ tr = gr.Dropdown([], label="Trim (optional)", value=None)
341
+ yr = gr.Dropdown([], label="Year (optional)", value=None)
342
+ mk.change(models_for, mk, md)
343
+ md.change(on_model_change, [mk, md], [tr, yr])
344
+
345
+ with gr.Row():
346
+ body = gr.Dropdown(['Any'] + sorted(DF['body_type'].dropna().unique().tolist()),
347
+ value='Any', label='Body')
348
+ fuel = gr.Dropdown(['Any'] + sorted(DF['fuel'].dropna().unique().tolist()),
349
+ value='Any', label='Fuel')
350
+
351
+ with gr.Row():
352
+ y_min = gr.Slider(y_lo, y_hi, value=y_lo, step=1, label='Year min')
353
+ y_max = gr.Slider(y_lo, y_hi, value=y_hi, step=1, label='Year max')
354
+
355
+ with gr.Row():
356
+ p_min = gr.Slider(p_lo, p_hi, value=p_lo, step=500, label='Price min (USD)')
357
+ p_max = gr.Slider(p_lo, p_hi, value=min(p_hi, 80000), step=500, label='Price max (USD)')
358
+
359
+ with gr.Row():
360
+ safety = gr.Slider(3, 5, value=4, step=1, label='Min Safety ★')
361
+ reliab = gr.Slider(55, 99, value=70, step=1, label='Min Reliability')
362
+
363
+ with gr.Row():
364
+ topk = gr.Slider(1, 10, value=5, step=1, label='Recommendations')
365
+ alpha = gr.Slider(0, 1, value=0.7, step=0.05, label='α — Text vs Numeric')
366
+
367
+ with gr.Row():
368
+ cross = gr.Checkbox(label="Cross-brand only", value=True)
369
+ xmodel = gr.Checkbox(label="Exclude same model family", value=True)
370
+
371
+ go = gr.Button("Recommend")
372
+ anchor_md = gr.Markdown()
373
+ table = gr.Dataframe(interactive=False)
374
+ note = gr.Markdown()
375
+ gallery = gr.HTML()
376
+
377
+ go.click(
378
+ recommend,
379
+ [mk, md, tr, yr, topk, alpha, body, fuel, y_min, y_max, p_min, p_max, safety, reliab, cross, xmodel],
380
+ [anchor_md, table, note, gallery]
381
+ )
382
+
383
+ gr.Markdown("Tip: If you don’t add images, you still get clean placeholders. "
384
+ "Add an 'image_url' column to your CSV to show real photos.")
385
+
386
+ return demo
387
+
388
+ demo = build_ui()
389
+
390
+ if __name__ == "__main__":
391
+ demo.queue().launch(server_name="0.0.0.0", server_port=7860)