Levimichael4 commited on
Commit
1ce89d0
·
verified ·
1 Parent(s): ebb512a

Upload 3 files

Browse files
Files changed (3) hide show
  1. app_new_images.py +508 -0
  2. repair_accel.py +79 -0
  3. trims_map (1).json +382 -0
app_new_images.py ADDED
@@ -0,0 +1,508 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # app_new_images.py — RideSearch with real trims + auto photos + Admin tools
3
+ # -------------------------------------------------------------------------
4
+ # Features:
5
+ # - Trim mapping via trims_map.json (for correct brand/model-specific display)
6
+ # - Cross-brand recommendations (unique model families by default)
7
+ # - Automatic photos from Wikipedia/Wikimedia (no key), optional Bing fallback via env BING_KEY
8
+ # - Admin • Trim Fixer: preview dataset trims, save curated display trims per model
9
+ # - Admin • Dataset Tools: recompute zero_to_100_kmh_s with a realistic heuristic and download the fixed CSV
10
+
11
+ import os, glob, json, urllib.parse, requests, io
12
+ import numpy as np
13
+ import pandas as pd
14
+ from sklearn.metrics.pairwise import cosine_similarity
15
+ from sklearn.preprocessing import StandardScaler
16
+ import gradio as gr
17
+
18
+ DATA_PATH = 'RideSearch_dataset.csv'
19
+ TRIMS_PATH = 'trims_map.json'
20
+
21
+ # ----------------------------- Data loading -----------------------------
22
+ def load_df():
23
+ if os.path.exists(DATA_PATH):
24
+ return pd.read_csv(DATA_PATH)
25
+ parts = sorted(glob.glob('RideSearch_part*_small.csv'))
26
+ if not parts:
27
+ raise FileNotFoundError("Upload RideSearch_dataset.csv OR the 10 parts RideSearch_part*_small.csv.")
28
+ df = pd.concat([pd.read_csv(p) for p in parts], ignore_index=True)
29
+ df.to_csv(DATA_PATH, index=False)
30
+ return df
31
+
32
+ DF = load_df()
33
+
34
+ NUM_COLS = [
35
+ 'horsepower','zero_to_100_kmh_s','seats','cargo_liters','price_usd',
36
+ 'popularity_score','comfort_score','reliability_score','tech_score',
37
+ 'ownership_cost_score','safety_rating'
38
+ ]
39
+
40
+ # ----------------------------- Embeddings -----------------------------
41
+ def ensure_emb():
42
+ txt_ok = os.path.exists('emb_text.npy')
43
+ num_ok = os.path.exists('emb_num.npy')
44
+ if txt_ok and num_ok:
45
+ return np.load('emb_text.npy'), np.load('emb_num.npy')
46
+ from sentence_transformers import SentenceTransformer
47
+ m = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
48
+ texts = DF['text_record'].astype(str).tolist()
49
+ Etext = m.encode(texts, batch_size=256, show_progress_bar=True, normalize_embeddings=True)
50
+ Etext = np.asarray(Etext, dtype='float32')
51
+ np.save('emb_text.npy', Etext)
52
+
53
+ X = DF[NUM_COLS].copy()
54
+ if 'zero_to_100_kmh_s' in X.columns:
55
+ X['zero_to_100_kmh_s'] = -X['zero_to_100_kmh_s'].astype('float32') # lower is better → invert
56
+ Xs = StandardScaler().fit_transform(X.values.astype('float32'))
57
+ Enum = Xs.astype('float32')
58
+ np.save('emb_num.npy', Enum)
59
+ return Etext, Enum
60
+
61
+ # ----------------------------- Trims mapping -----------------------------
62
+ TRIM_CHOICES = {} # (make, model) -> [display trims]
63
+ TRIM_ALIAS_TO_GENERIC = {} # (make, model, alias) -> generic token
64
+
65
+ def load_trims():
66
+ global TRIM_CHOICES, TRIM_ALIAS_TO_GENERIC
67
+ TRIM_CHOICES, TRIM_ALIAS_TO_GENERIC = {}, {}
68
+ if not os.path.exists(TRIMS_PATH):
69
+ return
70
+ with open(TRIMS_PATH,'r',encoding='utf-8') as f:
71
+ data = json.load(f)
72
+ for key, v in data.items():
73
+ make, model = key.split('||', 1)
74
+ TRIM_CHOICES[(make, model)] = v.get('display', [])
75
+ for alias, generic in v.get('alias_to_generic', {}).items():
76
+ TRIM_ALIAS_TO_GENERIC[(make, model, alias)] = generic
77
+
78
+ load_trims()
79
+
80
+ def generic_to_display(make, model, generic_trim):
81
+ if not generic_trim:
82
+ return ""
83
+ choices = TRIM_CHOICES.get((make, model))
84
+ if not choices:
85
+ return str(generic_trim)
86
+ for alias in choices:
87
+ if TRIM_ALIAS_TO_GENERIC.get((make, model, alias), alias) == generic_trim:
88
+ return alias
89
+ return str(generic_trim)
90
+
91
+ def alias_to_generic(make, model, alias):
92
+ if not alias:
93
+ return None
94
+ return TRIM_ALIAS_TO_GENERIC.get((make, model, alias), alias)
95
+
96
+ # ----------------------------- UI helpers -----------------------------
97
+ def models_for(make):
98
+ if not make:
99
+ return gr.update(choices=[], value=None)
100
+ opts = sorted(DF.loc[DF['make'].eq(make), 'model'].dropna().unique().tolist())
101
+ return gr.update(choices=opts, value=None)
102
+
103
+ def trim_year(make, model):
104
+ if make and model and (make, model) in TRIM_CHOICES:
105
+ trims = TRIM_CHOICES[(make, model)]
106
+ else:
107
+ sub = DF
108
+ if make: sub = sub[sub['make'] == make]
109
+ if model: sub = sub[sub['model'] == model]
110
+ if 'trim' in sub.columns and not sub.empty:
111
+ # Frequent trims first
112
+ freq = sub['trim'].astype(str).value_counts().head(15).index.tolist()
113
+ trims = [generic_to_display(make or "", model or "", t) for t in freq]
114
+ else:
115
+ trims = []
116
+ if make and model:
117
+ years = sorted(
118
+ DF.loc[(DF['make'].eq(make)) & (DF['model'].eq(model)), 'year']
119
+ .dropna().astype(int).unique().tolist()
120
+ )
121
+ else:
122
+ years = []
123
+ return trims, years
124
+
125
+ def on_model_change(make, model):
126
+ trims, years = trim_year(make, model)
127
+ return gr.update(choices=trims, value=None), gr.update(choices=years, value=None)
128
+
129
+ def apply_filters(df, body, fuel, y_min, y_max, p_min, p_max, safety, reliab):
130
+ out = df.copy()
131
+ if body != 'Any': out = out[out['body_type'] == body]
132
+ if fuel != 'Any': out = out[out['fuel'] == fuel]
133
+ out = out[(out['year'] >= y_min) & (out['year'] <= y_max)]
134
+ out = out[(out['price_usd'] >= p_min) & (out['price_usd'] <= p_max)]
135
+ out = out[(out['safety_rating'] >= safety) & (out['reliability_score'] >= reliab)]
136
+ return out
137
+
138
+ # ----------------------------- Photos -----------------------------
139
+ def fetch_wikimedia_image(query):
140
+ # Try PageImages
141
+ try:
142
+ q = urllib.parse.quote(query)
143
+ url = f"https://en.wikipedia.org/w/api.php?action=query&prop=pageimages&format=json&piprop=original&titles={q}"
144
+ r = requests.get(url, timeout=8)
145
+ data = r.json()
146
+ pages = data.get('query', {}).get('pages', {})
147
+ for _, v in pages.items():
148
+ orig = v.get('original')
149
+ if orig and 'source' in orig:
150
+ return orig['source']
151
+ except Exception:
152
+ pass
153
+ # Try REST search + summary thumbnail
154
+ try:
155
+ s = requests.get(
156
+ "https://en.wikipedia.org/w/rest.php/v1/search/title",
157
+ params={"q": query, "limit": 1},
158
+ timeout=8
159
+ ).json()
160
+ if s.get('pages'):
161
+ title = s['pages'][0]['title']
162
+ summ = requests.get(
163
+ f"https://en.wikipedia.org/api/rest_v1/page/summary/{urllib.parse.quote(title)}",
164
+ timeout=8
165
+ ).json()
166
+ thumb = summ.get('thumbnail',{}).get('source')
167
+ if thumb:
168
+ return thumb
169
+ except Exception:
170
+ pass
171
+ return None
172
+
173
+ def fetch_bing_image(query):
174
+ key = os.getenv("BING_KEY")
175
+ if not key:
176
+ return None
177
+ try:
178
+ headers = {"Ocp-Apim-Subscription-Key": key}
179
+ params = {"q": query, "count": 1, "safeSearch": "Strict"}
180
+ r = requests.get("https://api.bing.microsoft.com/v7.0/images/search",
181
+ headers=headers, params=params, timeout=8)
182
+ j = r.json()
183
+ if j.get("value"):
184
+ return j["value"][0]["contentUrl"]
185
+ except Exception:
186
+ return None
187
+ return None
188
+
189
+ def get_image_for(make, model, trim_disp, year):
190
+ parts = [str(p) for p in [year, make, model, trim_disp] if p]
191
+ base = " ".join(parts)
192
+ url = fetch_wikimedia_image(base) or fetch_wikimedia_image(f"{make} {model}")
193
+ if not url:
194
+ url = fetch_bing_image(base)
195
+ return url
196
+
197
+ def placeholder_svg_data_uri(title):
198
+ svg = f\"\"\"<svg xmlns='http://www.w3.org/2000/svg' width='480' height='320'>
199
+ <rect width='100%' height='100%' fill='#eef3fb'/>
200
+ <text x='50%' y='50%' dominant-baseline='middle' text-anchor='middle'
201
+ font-family='Arial' font-size='22' fill='#223'>
202
+ {title}
203
+ </text>
204
+ </svg>\"\"\"
205
+ return "data:image/svg+xml;utf8," + urllib.parse.quote(svg)
206
+
207
+ def build_gallery_html(df_rows):
208
+ cards = []
209
+ for _, r in df_rows.iterrows():
210
+ disp_trim = generic_to_display(r['make'], r['model'], r['trim'])
211
+ label = f"{r['make']} {r['model']} {disp_trim}"
212
+ img_src = get_image_for(r['make'], r['model'], disp_trim, int(r['year']))
213
+ if not img_src:
214
+ img_src = placeholder_svg_data_uri(f"{r['make']} {r['model']}")
215
+ cards.append(f\"\"\"
216
+ <div style="width:240px;margin:6px;border:1px solid #ddd;border-radius:12px;overflow:hidden;background:#fff;">
217
+ <img src="{img_src}" style="width:240px;height:160px;object-fit:cover;display:block" />
218
+ <div style="padding:8px 10px;font:14px/1.3 Arial,sans-serif;color:#111">{label}</div>
219
+ </div>
220
+ \"\"\")
221
+ return f"<div style='display:flex;flex-wrap:wrap'>{''.join(cards)}</div>"
222
+
223
+ # ----------------------------- Anchor & Recommend -----------------------------
224
+ def find_anchor(make, model, trim_display, year):
225
+ trim_generic = alias_to_generic(make, model, trim_display) if trim_display else None
226
+ sub = DF.copy()
227
+ if make: sub = sub[sub['make'] == make]
228
+ if model: sub = sub[sub['model'] == model]
229
+
230
+ def pick(df_):
231
+ if df_.empty: return None
232
+ return df_.sort_values('popularity_score', ascending=False).iloc[0]
233
+
234
+ exact = sub.copy()
235
+ if trim_generic: exact = exact[exact['trim'] == trim_generic]
236
+ if year: exact = exact[exact['year'] == year]
237
+ if not exact.empty: return pick(exact)
238
+
239
+ if year:
240
+ y_only = sub[sub['year'] == year]
241
+ if not y_only.empty: return pick(y_only)
242
+ if trim_generic:
243
+ t_only = sub[sub['trim'] == trim_generic]
244
+ if not t_only.empty: return pick(t_only)
245
+
246
+ return pick(sub)
247
+
248
+ def apply_and_recommend(a, topk, alpha, body, fuel, y_min, y_max, p_min, p_max, safety, reliab,
249
+ cross_brand_only, exclude_same_model):
250
+ pool = DF.copy()
251
+ if cross_brand_only:
252
+ pool = pool[pool['make'] != a['make']]
253
+ if exclude_same_model:
254
+ pool = pool[~((pool['make'] == a['make']) & (pool['model'] == a['model']))]
255
+ pool = apply_filters(pool, body, fuel, int(y_min), int(y_max), int(p_min), int(p_max), int(safety), int(reliab))
256
+ if pool.empty:
257
+ return None, "No cars after filters. Try widening year/price/safety."
258
+ Etext, Enum = ensure_emb()
259
+ idx_anchor = int(a.name)
260
+ cand_idx = pool.index.values
261
+ st = cosine_similarity(Etext[idx_anchor:idx_anchor+1], Etext[cand_idx])[0]
262
+ sn = cosine_similarity(Enum[idx_anchor:idx_anchor+1], Enum[cand_idx])[0]
263
+ s = float(alpha)*st + (1-float(alpha))*sn
264
+ order = np.argsort(-s)
265
+ seen = set(); chosen = []
266
+ for j in order:
267
+ r = DF.loc[cand_idx[j]]
268
+ key = (r['make'], r['model'])
269
+ if key in seen: continue
270
+ seen.add(key); chosen.append(cand_idx[j])
271
+ if len(chosen) >= int(topk): break
272
+ if not chosen:
273
+ return None, "No recommendations found after constraints."
274
+ sel = DF.loc[chosen].copy()
275
+ sel['trim_display'] = sel.apply(lambda r: generic_to_display(r['make'], r['model'], r['trim']), axis=1)
276
+ sim_lookup = {cand_idx[j]: round(float(s[j])*100, 1) for j in order}
277
+ sel['similarity_%'] = sel.index.map(lambda k: sim_lookup.get(k, 0.0))
278
+ return sel, None
279
+
280
+ def recommend(make, model, trim_display, year, topk, alpha,
281
+ body, fuel, y_min, y_max, p_min, p_max, safety, reliab,
282
+ cross_brand_only=True, exclude_same_model=True):
283
+
284
+ a = find_anchor(make, model, trim_display, year)
285
+ if a is None:
286
+ return "No match for that combo.", None, "", None
287
+
288
+ sel, err = apply_and_recommend(a, topk, alpha, body, fuel, y_min, y_max, p_min, p_max, safety, reliab,
289
+ cross_brand_only, exclude_same_model)
290
+ if err:
291
+ return err, None, "", None
292
+
293
+ cols = ['name','make','model','trim_display','year','body_type','fuel','engine_type',
294
+ 'price_usd','horsepower','zero_to_100_kmh_s','popularity_score','comfort_score',
295
+ 'reliability_score','tech_score','ownership_cost_score','safety_rating','similarity_%']
296
+
297
+ anchor_text = (f"**{a['make']} {a['model']} {generic_to_display(a['make'], a['model'], a['trim'])} "
298
+ f"{int(a['year'])}** \\n"
299
+ f"Body: {a['body_type']} • Fuel: {a['fuel']} • Engine: {a['engine_type']} \\n"
300
+ f"HP: {int(a['horsepower'])} • 0–100: {a['zero_to_100_kmh_s']}s • Price: ${int(a['price_usd']):,} \\n"
301
+ f"Popularity {int(a['popularity_score'])}/10 • Comfort {int(a['comfort_score'])}/10 • "
302
+ f"Reliability {int(a['reliability_score'])}/100 • Safety {int(a['safety_rating'])}★")
303
+
304
+ note = (f"α = {float(alpha):.2f} (text ↔ numeric) • Cross-brand only = {cross_brand_only} "
305
+ f"• Exclude same model = {exclude_same_model}")
306
+
307
+ gallery = build_gallery_html(sel)
308
+ return anchor_text, sel[cols], note, gallery
309
+
310
+ # ----------------------------- Admin: Trim Fixer -----------------------------
311
+ def wiki_suggest_trims(make, model):
312
+ query = f"{make} {model} trim levels"
313
+ titles = []
314
+ try:
315
+ s = requests.get(
316
+ "https://en.wikipedia.org/w/rest.php/v1/search/title",
317
+ params={"q": query, "limit": 5},
318
+ timeout=8
319
+ ).json()
320
+ titles = [p['title'] for p in s.get('pages', [])]
321
+ except Exception:
322
+ pass
323
+ sub = DF[(DF['make']==make) & (DF['model']==model)]
324
+ hints = sub['trim'].astype(str).value_counts().head(10).index.tolist()
325
+ return {"wiki_titles": titles, "dataset_top_trims": hints}
326
+
327
+ def admin_preview(make, model):
328
+ info = wiki_suggest_trims(make, model)
329
+ df_sub = DF[(DF['make']==make) & (DF['model']==model)][['trim','year']].copy()
330
+ df_sub['count'] = 1
331
+ counts = df_sub.groupby('trim')['count'].sum().reset_index().sort_values('count', ascending=False)
332
+ return info, counts
333
+
334
+ def admin_save_mapping(make, model, list_of_trims):
335
+ entries = [t.strip() for t in list_of_trims.splitlines() if t.strip()]
336
+ if not entries:
337
+ return "No trims provided."
338
+ key = f"{make}||{model}"
339
+ alias_map = {t: t for t in entries} # identity mapping by default
340
+ data = {}
341
+ if os.path.exists(TRIMS_PATH):
342
+ with open(TRIMS_PATH,'r',encoding='utf-8') as f:
343
+ data = json.load(f)
344
+ data[key] = {"display": entries, "alias_to_generic": alias_map}
345
+ with open(TRIMS_PATH,'w',encoding='utf-8') as f:
346
+ json.dump(data, f, indent=2, ensure_ascii=False)
347
+ load_trims()
348
+ return f"Saved {len(entries)} trims for {make} {model}. Refresh the main tab dropdown."
349
+
350
+ # ----------------------------- Admin: Dataset Tools (0–100 fix) -----------------------------
351
+ def _estimate_0_100(row):
352
+ """Heuristic: t = 26 - 3.2*ln(hp) + body_adj + fuel_adj + trim_adj + noise, clamped [2.9, 14.5]."""
353
+ try:
354
+ hp = float(row.get('horsepower', 150) or 150)
355
+ except Exception:
356
+ hp = 150.0
357
+ body = str(row.get('body_type','')).lower()
358
+ fuel = str(row.get('fuel','')).lower()
359
+ trim = str(row.get('trim','')).lower()
360
+
361
+ base = 26.0 - 3.2*np.log(max(hp, 60.0)) # >=60 hp to avoid extremes
362
+
363
+ body_adj_map = {
364
+ 'sports': -2.5, 'coupe': -1.2, 'sedan': 0.0, 'hatch': 0.2, 'hatchback': 0.2,
365
+ 'wagon': 0.4, 'suv': 0.8, 'crossover': 0.6, 'pickup': 1.2, 'truck': 1.2, 'van': 1.0
366
+ }
367
+ body_adj = 0.0
368
+ for k,v in body_adj_map.items():
369
+ if k in body:
370
+ body_adj = v; break
371
+
372
+ fuel_adj = 0.0
373
+ if 'ev' in fuel or 'electric' in fuel: fuel_adj -= 0.8
374
+ if 'hybrid' in fuel: fuel_adj -= 0.3
375
+ if 'diesel' in fuel: fuel_adj += 0.4
376
+
377
+ trim_adj = 0.0
378
+ if 'performance' in trim or 'rs' in trim or 'amg' in trim or 'm ' in f" {trim} " or 'type r' in trim:
379
+ trim_adj -= 0.5
380
+ elif 'sport' in trim:
381
+ trim_adj -= 0.3
382
+ elif 'premium' in trim:
383
+ trim_adj -= 0.2
384
+
385
+ noise = np.random.uniform(-0.2, 0.2)
386
+ t = base + body_adj + fuel_adj + trim_adj + noise
387
+ t = float(np.clip(t, 2.9, 14.5))
388
+ return round(t, 2)
389
+
390
+ def admin_fix_zero_to_100(save_as_new=True):
391
+ df = DF.copy()
392
+ col = 'zero_to_100_kmh_s'
393
+ # Detect "broken" column (too-low variance or few unique values)
394
+ bad = False
395
+ try:
396
+ vals = df[col].astype(float)
397
+ if vals.std() < 0.25 or vals.nunique() < max(10, int(0.05*len(vals))):
398
+ bad = True
399
+ except Exception:
400
+ bad = True
401
+ if not bad:
402
+ # Still offer regeneration by choice
403
+ bad = True
404
+
405
+ if bad:
406
+ df[col] = df.apply(_estimate_0_100, axis=1)
407
+
408
+ out_name = 'RideSearch_dataset_fixed.csv' if save_as_new else DATA_PATH
409
+ df.to_csv(out_name, index=False)
410
+
411
+ # Simple before/after stats
412
+ try:
413
+ old_std = float(DF[col].astype(float).std())
414
+ except Exception:
415
+ old_std = float('nan')
416
+ new_std = float(df[col].astype(float).std())
417
+ info = {
418
+ "saved_to": out_name,
419
+ "old_std": old_std,
420
+ "new_std": new_std,
421
+ "rows": int(len(df))
422
+ }
423
+ # Provide a downloadable file
424
+ return info, out_name
425
+
426
+ # ----------------------------- UI -----------------------------
427
+ def build_ui():
428
+ y_lo, y_hi = int(DF['year'].min()), int(DF['year'].max())
429
+ p_lo, p_hi = int(DF['price_usd'].min()), int(DF['price_usd'].max())
430
+
431
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
432
+ gr.Markdown("# RideSearch — cross-brand recommendations with **real trims** + automatic photos")
433
+
434
+ with gr.Tab("Pick & Recommend"):
435
+ with gr.Row():
436
+ mk = gr.Dropdown(sorted(DF['make'].dropna().unique().tolist()), label="Make")
437
+ md = gr.Dropdown([], label="Model")
438
+ tr = gr.Dropdown([], label="Trim (optional)")
439
+ yr = gr.Dropdown([], label="Year (optional)")
440
+ mk.change(models_for, mk, md)
441
+ md.change(lambda a,b: on_model_change(a,b), [mk, md], [tr, yr])
442
+
443
+ with gr.Row():
444
+ body = gr.Dropdown(['Any'] + sorted(DF['body_type'].dropna().unique().tolist()), value='Any', label='Body')
445
+ fuel = gr.Dropdown(['Any'] + sorted(DF['fuel'].dropna().unique().tolist()), value='Any', label='Fuel')
446
+ with gr.Row():
447
+ y_min = gr.Slider(y_lo, y_hi, value=y_lo, step=1, label='Year min')
448
+ y_max = gr.Slider(y_lo, y_hi, value=y_hi, step=1, label='Year max')
449
+ with gr.Row():
450
+ p_min = gr.Slider(p_lo, p_hi, value=p_lo, step=500, label='Price min (USD)')
451
+ p_max = gr.Slider(p_lo, p_hi, value=min(p_hi, 80000), step=500, label='Price max (USD)')
452
+ with gr.Row():
453
+ safety = gr.Slider(3, 5, value=4, step=1, label='Min Safety ★')
454
+ reliab = gr.Slider(55, 99, value=70, step=1, label='Min Reliability')
455
+ with gr.Row():
456
+ topk = gr.Slider(1, 10, value=5, step=1, label='Recommendations')
457
+ alpha = gr.Slider(0, 1, value=0.7, step=0.05, label='α — Text vs Numeric')
458
+ with gr.Row():
459
+ cross = gr.Checkbox(label="Cross-brand only", value=True)
460
+ xmodel = gr.Checkbox(label="Exclude same model family", value=True)
461
+
462
+ go = gr.Button("Recommend")
463
+ anchor_md = gr.Markdown()
464
+ table = gr.Dataframe(interactive=False, wrap=True, label="Recommendations")
465
+ note = gr.Markdown()
466
+ gallery = gr.HTML()
467
+
468
+ go.click(
469
+ recommend,
470
+ [mk, md, tr, yr, topk, alpha, body, fuel, y_min, y_max, p_min, p_max, safety, reliab, cross, xmodel],
471
+ [anchor_md, table, note, gallery]
472
+ )
473
+
474
+ with gr.Tab("Admin • Trim Fixer"):
475
+ gr.Markdown("**Add or repair realistic trim lists** per model. These control dropdowns and result display.")
476
+ with gr.Row():
477
+ a_mk = gr.Dropdown(sorted(DF['make'].dropna().unique().tolist()), label="Make")
478
+ a_md = gr.Dropdown([], label="Model")
479
+ a_mk.change(models_for, a_mk, a_md)
480
+ with gr.Row():
481
+ prev_btn = gr.Button("Preview dataset trims + Wiki hints")
482
+ out_json = gr.JSON(value={})
483
+ out_counts = gr.Dataframe(wrap=True)
484
+ prev_btn.click(admin_preview, [a_mk, a_md], [out_json, out_counts])
485
+
486
+ gr.Markdown("Paste **display trims** (one per line), then **Save mapping**.")
487
+ trims_txt = gr.Textbox(lines=8, placeholder="e.g.\n320i\n330i\n340i\nM3", label="Display trims (one per line)")
488
+ save_btn = gr.Button("Save mapping to trims_map.json")
489
+ save_msg = gr.Markdown()
490
+ save_btn.click(admin_save_mapping, [a_mk, a_md, trims_txt], save_msg)
491
+
492
+ with gr.Tab("Admin • Dataset Tools"):
493
+ gr.Markdown("**Fix zero_to_100_kmh_s** with a realistic heuristic and download the updated CSV.")
494
+ with gr.Row():
495
+ save_new = gr.Checkbox(value=True, label="Save as new file (RideSearch_dataset_fixed.csv)")
496
+ run_btn = gr.Button("Recompute 0–100 and Save")
497
+ info_json = gr.JSON()
498
+ out_file = gr.File(label="Download fixed CSV")
499
+ run_btn.click(admin_fix_zero_to_100, [save_new], [info_json, out_file])
500
+
501
+ gr.Markdown("Tip: Add a `BING_KEY` secret in Space → Settings → Variables for Bing image fallback.")
502
+
503
+ return demo
504
+
505
+ demo = build_ui()
506
+
507
+ if __name__ == "__main__":
508
+ demo.queue().launch(server_name="0.0.0.0", server_port=7860)
repair_accel.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # repair_accel.py — recompute zero_to_100_kmh_s for RideSearch_dataset.csv
3
+ # Usage:
4
+ # python repair_accel.py --in RideSearch_dataset.csv --out RideSearch_dataset_fixed.csv
5
+ #
6
+ # Heuristic:
7
+ # t = 26 - 3.2*ln(hp) + body_adj + fuel_adj + trim_adj + noise, clamped [2.9, 14.5]
8
+
9
+ import argparse, json, math, random
10
+ import pandas as pd
11
+ import numpy as np
12
+
13
+ def estimate_0_100(row):
14
+ try:
15
+ hp = float(row.get('horsepower', 150) or 150)
16
+ except Exception:
17
+ hp = 150.0
18
+ body = str(row.get('body_type','')).lower()
19
+ fuel = str(row.get('fuel','')).lower()
20
+ trim = str(row.get('trim','')).lower()
21
+
22
+ base = 26.0 - 3.2*math.log(max(hp, 60.0))
23
+
24
+ body_adj_map = {
25
+ 'sports': -2.5, 'coupe': -1.2, 'sedan': 0.0, 'hatch': 0.2, 'hatchback': 0.2,
26
+ 'wagon': 0.4, 'suv': 0.8, 'crossover': 0.6, 'pickup': 1.2, 'truck': 1.2, 'van': 1.0
27
+ }
28
+ body_adj = 0.0
29
+ for k,v in body_adj_map.items():
30
+ if k in body:
31
+ body_adj = v; break
32
+
33
+ fuel_adj = 0.0
34
+ if 'ev' in fuel or 'electric' in fuel: fuel_adj -= 0.8
35
+ if 'hybrid' in fuel: fuel_adj -= 0.3
36
+ if 'diesel' in fuel: fuel_adj += 0.4
37
+
38
+ trim_adj = 0.0
39
+ if 'performance' in trim or 'rs' in trim or 'amg' in trim or ' m ' in f' {trim} ' or 'type r' in trim:
40
+ trim_adj -= 0.5
41
+ elif 'sport' in trim:
42
+ trim_adj -= 0.3
43
+ elif 'premium' in trim:
44
+ trim_adj -= 0.2
45
+
46
+ noise = random.uniform(-0.2, 0.2)
47
+ t = base + body_adj + fuel_adj + trim_adj + noise
48
+ t = max(2.9, min(14.5, t))
49
+ return round(t, 2)
50
+
51
+ def main():
52
+ ap = argparse.ArgumentParser()
53
+ ap.add_argument('--in', dest='inp', default='RideSearch_dataset.csv')
54
+ ap.add_argument('--out', dest='outp', default='RideSearch_dataset_fixed.csv')
55
+ args = ap.parse_args()
56
+
57
+ df = pd.read_csv(args.inp)
58
+ # Detect if broken (too little variance)
59
+ bad = True
60
+ try:
61
+ vals = df['zero_to_100_kmh_s'].astype(float)
62
+ if vals.std() >= 0.25 and vals.nunique() >= max(10, int(0.05*len(vals))):
63
+ bad = False
64
+ except Exception:
65
+ bad = True
66
+
67
+ # Always recompute if user runs the script
68
+ df['zero_to_100_kmh_s'] = df.apply(estimate_0_100, axis=1)
69
+ df.to_csv(args.outp, index=False)
70
+
71
+ try:
72
+ new_std = float(df['zero_to_100_kmh_s'].astype(float).std())
73
+ except Exception:
74
+ new_std = float('nan')
75
+
76
+ print({'saved_to': args.outp, 'rows': int(len(df)), 'new_std': new_std})
77
+
78
+ if __name__ == '__main__':
79
+ main()
trims_map (1).json ADDED
@@ -0,0 +1,382 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "BMW||3 Series": {
3
+ "display": [
4
+ "318i",
5
+ "320i",
6
+ "330i",
7
+ "330e",
8
+ "340i",
9
+ "M3"
10
+ ],
11
+ "alias_to_generic": {
12
+ "318i": "Base",
13
+ "320i": "Base",
14
+ "330i": "Sport",
15
+ "330e": "Sport",
16
+ "340i": "Premium",
17
+ "M3": "Performance"
18
+ }
19
+ },
20
+ "Audi||A4": {
21
+ "display": [
22
+ "35 TFSI",
23
+ "40 TFSI",
24
+ "45 TFSI",
25
+ "S4",
26
+ "RS4"
27
+ ],
28
+ "alias_to_generic": {
29
+ "35 TFSI": "Base",
30
+ "40 TFSI": "Sport",
31
+ "45 TFSI": "Premium",
32
+ "S4": "Performance",
33
+ "RS4": "Performance"
34
+ }
35
+ },
36
+ "Mercedes-Benz||C-Class": {
37
+ "display": [
38
+ "C180",
39
+ "C200",
40
+ "C220d",
41
+ "C300",
42
+ "AMG C43",
43
+ "AMG C63"
44
+ ],
45
+ "alias_to_generic": {
46
+ "C180": "Base",
47
+ "C200": "Base",
48
+ "C220d": "Base",
49
+ "C300": "Premium",
50
+ "AMG C43": "Performance",
51
+ "AMG C63": "Performance"
52
+ }
53
+ },
54
+ "Lexus||IS": {
55
+ "display": [
56
+ "IS 300",
57
+ "IS 350",
58
+ "IS 500 F SPORT"
59
+ ],
60
+ "alias_to_generic": {
61
+ "IS 300": "Base",
62
+ "IS 350": "Premium",
63
+ "IS 500 F SPORT": "Performance"
64
+ }
65
+ },
66
+ "Toyota||Corolla": {
67
+ "display": [
68
+ "L",
69
+ "LE",
70
+ "SE",
71
+ "XSE",
72
+ "GR"
73
+ ],
74
+ "alias_to_generic": {
75
+ "L": "Base",
76
+ "LE": "Base",
77
+ "SE": "Sport",
78
+ "XSE": "Premium",
79
+ "GR": "Performance"
80
+ }
81
+ },
82
+ "Honda||Civic": {
83
+ "display": [
84
+ "LX",
85
+ "Sport",
86
+ "EX",
87
+ "Touring",
88
+ "Type R"
89
+ ],
90
+ "alias_to_generic": {
91
+ "LX": "Base",
92
+ "Sport": "Sport",
93
+ "EX": "Premium",
94
+ "Touring": "Premium",
95
+ "Type R": "Performance"
96
+ }
97
+ },
98
+ "Volkswagen||Golf": {
99
+ "display": [
100
+ "Trendline",
101
+ "Comfortline",
102
+ "Highline",
103
+ "GTI",
104
+ "R"
105
+ ],
106
+ "alias_to_generic": {
107
+ "Trendline": "Base",
108
+ "Comfortline": "Base",
109
+ "Highline": "Premium",
110
+ "GTI": "Performance",
111
+ "R": "Performance"
112
+ }
113
+ },
114
+ "Hyundai||Elantra": {
115
+ "display": [
116
+ "SE",
117
+ "SEL",
118
+ "Limited",
119
+ "N Line",
120
+ "N"
121
+ ],
122
+ "alias_to_generic": {
123
+ "SE": "Base",
124
+ "SEL": "Base",
125
+ "Limited": "Premium",
126
+ "N Line": "Sport",
127
+ "N": "Performance"
128
+ }
129
+ },
130
+ "Kia||Forte": {
131
+ "display": [
132
+ "LX",
133
+ "S",
134
+ "EX",
135
+ "GT-Line",
136
+ "GT"
137
+ ],
138
+ "alias_to_generic": {
139
+ "LX": "Base",
140
+ "S": "Sport",
141
+ "EX": "Premium",
142
+ "GT-Line": "Sport",
143
+ "GT": "Performance"
144
+ }
145
+ },
146
+ "Jeep||Wrangler": {
147
+ "display": [
148
+ "Sport",
149
+ "Willys",
150
+ "Sahara",
151
+ "Rubicon",
152
+ "392"
153
+ ],
154
+ "alias_to_generic": {
155
+ "Sport": "Base",
156
+ "Willys": "Sport",
157
+ "Sahara": "Premium",
158
+ "Rubicon": "Performance",
159
+ "392": "Performance"
160
+ }
161
+ },
162
+ "Land Rover||Range Rover Evoque": {
163
+ "display": [
164
+ "S",
165
+ "SE",
166
+ "R-Dynamic S",
167
+ "R-Dynamic SE",
168
+ "Autobiography"
169
+ ],
170
+ "alias_to_generic": {
171
+ "S": "Base",
172
+ "SE": "Premium",
173
+ "R-Dynamic S": "Sport",
174
+ "R-Dynamic SE": "Premium",
175
+ "Autobiography": "Premium"
176
+ }
177
+ },
178
+ "Mazda||Mazda3": {
179
+ "display": [
180
+ "S",
181
+ "Select",
182
+ "Preferred",
183
+ "Premium",
184
+ "Turbo"
185
+ ],
186
+ "alias_to_generic": {
187
+ "S": "Base",
188
+ "Select": "Base",
189
+ "Preferred": "Premium",
190
+ "Premium": "Premium",
191
+ "Turbo": "Performance"
192
+ }
193
+ },
194
+ "Mitsubishi||Outlander": {
195
+ "display": [
196
+ "ES",
197
+ "SE",
198
+ "SEL",
199
+ "Black Edition",
200
+ "PHEV"
201
+ ],
202
+ "alias_to_generic": {
203
+ "ES": "Base",
204
+ "SE": "Sport",
205
+ "SEL": "Premium",
206
+ "Black Edition": "Premium",
207
+ "PHEV": "Premium"
208
+ }
209
+ },
210
+ "Nissan||Rogue": {
211
+ "display": [
212
+ "S",
213
+ "SV",
214
+ "SL",
215
+ "Platinum"
216
+ ],
217
+ "alias_to_generic": {
218
+ "S": "Base",
219
+ "SV": "Sport",
220
+ "SL": "Premium",
221
+ "Platinum": "Premium"
222
+ }
223
+ },
224
+ "Peugeot||3008": {
225
+ "display": [
226
+ "Active",
227
+ "Allure",
228
+ "GT",
229
+ "GT Pack"
230
+ ],
231
+ "alias_to_generic": {
232
+ "Active": "Base",
233
+ "Allure": "Premium",
234
+ "GT": "Premium",
235
+ "GT Pack": "Premium"
236
+ }
237
+ },
238
+ "Porsche||911": {
239
+ "display": [
240
+ "Carrera",
241
+ "Carrera S",
242
+ "GTS",
243
+ "Turbo",
244
+ "GT3"
245
+ ],
246
+ "alias_to_generic": {
247
+ "Carrera": "Base",
248
+ "Carrera S": "Premium",
249
+ "GTS": "Premium",
250
+ "Turbo": "Performance",
251
+ "GT3": "Performance"
252
+ }
253
+ },
254
+ "Ram||1500": {
255
+ "display": [
256
+ "Tradesman",
257
+ "Big Horn",
258
+ "Laramie",
259
+ "Rebel",
260
+ "Limited"
261
+ ],
262
+ "alias_to_generic": {
263
+ "Tradesman": "Base",
264
+ "Big Horn": "Sport",
265
+ "Laramie": "Premium",
266
+ "Rebel": "Sport",
267
+ "Limited": "Premium"
268
+ }
269
+ },
270
+ "Renault||Clio": {
271
+ "display": [
272
+ "Authentique",
273
+ "Expression",
274
+ "Dynamique",
275
+ "RS Line"
276
+ ],
277
+ "alias_to_generic": {
278
+ "Authentique": "Base",
279
+ "Expression": "Sport",
280
+ "Dynamique": "Premium",
281
+ "RS Line": "Performance"
282
+ }
283
+ },
284
+ "Seat||Leon": {
285
+ "display": [
286
+ "Reference",
287
+ "Style",
288
+ "FR",
289
+ "Cupra"
290
+ ],
291
+ "alias_to_generic": {
292
+ "Reference": "Base",
293
+ "Style": "Sport",
294
+ "FR": "Sport",
295
+ "Cupra": "Performance"
296
+ }
297
+ },
298
+ "Skoda||Octavia": {
299
+ "display": [
300
+ "Active",
301
+ "Ambition",
302
+ "Style",
303
+ "RS"
304
+ ],
305
+ "alias_to_generic": {
306
+ "Active": "Base",
307
+ "Ambition": "Sport",
308
+ "Style": "Premium",
309
+ "RS": "Performance"
310
+ }
311
+ },
312
+ "Subaru||Outback": {
313
+ "display": [
314
+ "Base",
315
+ "Premium",
316
+ "Limited",
317
+ "Wilderness",
318
+ "Touring"
319
+ ],
320
+ "alias_to_generic": {
321
+ "Base": "Base",
322
+ "Premium": "Premium",
323
+ "Limited": "Premium",
324
+ "Wilderness": "Sport",
325
+ "Touring": "Premium"
326
+ }
327
+ },
328
+ "Tesla||Model 3": {
329
+ "display": [
330
+ "RWD",
331
+ "Long Range",
332
+ "Performance"
333
+ ],
334
+ "alias_to_generic": {
335
+ "RWD": "Base",
336
+ "Long Range": "Premium",
337
+ "Performance": "Performance"
338
+ }
339
+ },
340
+ "Volkswagen||Tiguan": {
341
+ "display": [
342
+ "S",
343
+ "SE",
344
+ "SEL",
345
+ "R-Line"
346
+ ],
347
+ "alias_to_generic": {
348
+ "S": "Base",
349
+ "SE": "Sport",
350
+ "SEL": "Premium",
351
+ "R-Line": "Performance"
352
+ }
353
+ },
354
+ "Volvo||XC60": {
355
+ "display": [
356
+ "Core",
357
+ "Plus",
358
+ "Ultimate",
359
+ "Polestar Engineered"
360
+ ],
361
+ "alias_to_generic": {
362
+ "Core": "Base",
363
+ "Plus": "Premium",
364
+ "Ultimate": "Premium",
365
+ "Polestar Engineered": "Performance"
366
+ }
367
+ },
368
+ "Mini||Cooper": {
369
+ "display": [
370
+ "Classic",
371
+ "Signature",
372
+ "Iconic",
373
+ "John Cooper Works"
374
+ ],
375
+ "alias_to_generic": {
376
+ "Classic": "Base",
377
+ "Signature": "Premium",
378
+ "Iconic": "Premium",
379
+ "John Cooper Works": "Performance"
380
+ }
381
+ }
382
+ }