Levimichael4 commited on
Commit
9bf7679
·
verified ·
1 Parent(s): af839dc

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -327
app.py DELETED
@@ -1,327 +0,0 @@
1
- # app_new.py — RideSearch (cross-brand, brand-correct trims, smart fallbacks)
2
-
3
- import os, glob
4
- import numpy as np
5
- import pandas as pd
6
- from sklearn.metrics.pairwise import cosine_similarity
7
- from sklearn.preprocessing import StandardScaler
8
- import gradio as gr
9
-
10
- # =========================
11
- # Data loading & embeddings
12
- # =========================
13
-
14
- def load_df():
15
- """
16
- Load merged dataset if present. Otherwise merge small parts (part*_small.csv).
17
- """
18
- if os.path.exists('RideSearch_dataset.csv'):
19
- return pd.read_csv('RideSearch_dataset.csv')
20
-
21
- parts = sorted(glob.glob('RideSearch_part*_small.csv'))
22
- if not parts:
23
- raise FileNotFoundError(
24
- "Upload RideSearch_dataset.csv OR the 10 parts RideSearch_part*_small.csv."
25
- )
26
- df = pd.concat([pd.read_csv(p) for p in parts], ignore_index=True)
27
- df.to_csv('RideSearch_dataset.csv', index=False)
28
- return df
29
-
30
- DF = load_df()
31
-
32
- # numeric columns used for numeric embedding (adjust if your CSV differs)
33
- NUM_COLS = [
34
- 'horsepower','zero_to_100_kmh_s','seats','cargo_liters','price_usd',
35
- 'popularity_score','comfort_score','reliability_score','tech_score',
36
- 'ownership_cost_score','safety_rating'
37
- ]
38
-
39
- def ensure_emb():
40
- """
41
- Load or create text + numeric embeddings.
42
- Text uses all-MiniLM-L6-v2 on DF['text_record'].
43
- Numeric is StandardScaler on NUM_COLS (with 0-100 reversed for acceleration).
44
- """
45
- txt_ok = os.path.exists('emb_text.npy')
46
- num_ok = os.path.exists('emb_num.npy')
47
-
48
- if txt_ok and num_ok:
49
- return np.load('emb_text.npy'), np.load('emb_num.npy')
50
-
51
- # --- build on first run ---
52
- from sentence_transformers import SentenceTransformer
53
- m = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
54
-
55
- texts = DF['text_record'].astype(str).tolist()
56
- Etext = m.encode(texts, batch_size=256, show_progress_bar=True, normalize_embeddings=True)
57
- Etext = np.asarray(Etext, dtype='float32')
58
- np.save('emb_text.npy', Etext)
59
-
60
- X = DF[NUM_COLS].copy()
61
- # faster 0–100 → lower-better; invert accel so larger is better for similarity
62
- if 'zero_to_100_kmh_s' in X.columns:
63
- X['zero_to_100_kmh_s'] = -X['zero_to_100_kmh_s'].astype('float32')
64
- Xs = StandardScaler().fit_transform(X.values.astype('float32'))
65
- Enum = Xs.astype('float32')
66
- np.save('emb_num.npy', Enum)
67
-
68
- return Etext, Enum
69
-
70
- # ==========================================
71
- # Brand-correct trim display & alias mapping
72
- # ==========================================
73
-
74
- TRIM_CHOICES = {
75
- ("BMW","3 Series"): ["320i","330i","330e","340i","M3"],
76
- ("Audi","A3"): ["35 TFSI","40 TFSI","45 TFSI","S3","RS3"],
77
- ("Audi","A4"): ["35 TFSI","40 TFSI","45 TFSI","S4","RS4"],
78
- ("Mercedes-Benz","C-Class"): ["C200","C220d","C300","AMG C43","AMG C63"],
79
- ("Lexus","IS"): ["IS 300","IS 350","IS 500 F SPORT"],
80
- ("Toyota","Corolla"): ["L","LE","SE","XSE","GR"],
81
- ("Honda","Civic"): ["LX","Sport","EX","Touring","Type R"],
82
- ("Volkswagen","Golf"): ["Trendline","Comfortline","Highline","GTI","R"],
83
- ("Hyundai","Elantra"): ["SE","SEL","Limited","N Line","N"],
84
- ("Kia","Forte"): ["LX","S","EX","GT-Line","GT"],
85
- # add more pairs you plan to demo
86
- }
87
-
88
- # Map those display trims to your dataset’s generic trim tokens
89
- TRIM_ALIAS_TO_GENERIC = {
90
- # BMW 3
91
- "320i":"Base","330i":"Sport","330e":"Sport","340i":"Premium","M3":"Performance",
92
- # Audi A3/A4
93
- "35 TFSI":"Base","40 TFSI":"Sport","45 TFSI":"Premium","S3":"Performance","RS3":"Performance",
94
- "S4":"Performance","RS4":"Performance",
95
- # Mercedes C
96
- "C200":"Base","C220d":"Base","C300":"Premium","AMG C43":"Performance","AMG C63":"Performance",
97
- # Lexus IS
98
- "IS 300":"Base","IS 350":"Premium","IS 500 F SPORT":"Performance",
99
- # Toyota Corolla
100
- "L":"Base","LE":"Base","SE":"Sport","XSE":"Premium","GR":"Performance",
101
- # Honda Civic
102
- "LX":"Base","Sport":"Sport","EX":"Premium","Touring":"Premium","Type R":"Performance",
103
- # VW Golf
104
- "Trendline":"Base","Comfortline":"Base","Highline":"Premium","GTI":"Performance","R":"Performance",
105
- # Hyundai Elantra
106
- "SE":"Base","SEL":"Base","Limited":"Premium","N Line":"Sport","N":"Performance",
107
- # Kia Forte
108
- "LX":"Base","S":"Sport","EX":"Premium","GT-Line":"Sport","GT":"Performance",
109
- }
110
-
111
- # ==============================
112
- # Helpers: dropdowns & filtering
113
- # ==============================
114
-
115
- def models_for(make):
116
- if not make:
117
- return gr.update(choices=[], value=None)
118
- opts = sorted(DF.loc[DF['make'].eq(make), 'model'].dropna().unique().tolist())
119
- return gr.update(choices=opts, value=None)
120
-
121
- def trim_year(make, model):
122
- # Trims (brand-correct if we have them; otherwise from DF)
123
- if make and model and (make, model) in TRIM_CHOICES:
124
- trims = TRIM_CHOICES[(make, model)]
125
- else:
126
- sub = DF
127
- if make: sub = sub[sub['make'] == make]
128
- if model: sub = sub[sub['model'] == model]
129
- trims = sorted(sub['trim'].astype(str).dropna().unique().tolist())[:20]
130
-
131
- # Years
132
- if make and model:
133
- years = sorted(
134
- DF.loc[(DF['make'].eq(make)) & (DF['model'].eq(model)), 'year']
135
- .dropna().astype(int).unique().tolist()
136
- )
137
- else:
138
- years = []
139
- return trims, years
140
-
141
- def on_model_change(make, model):
142
- trims, years = trim_year(make, model)
143
- return gr.update(choices=trims, value=None), gr.update(choices=years, value=None)
144
-
145
- def normalize_trim_for_query(make, model, display_trim):
146
- """Map pretty display trims back to dataset generic tokens (Base/Sport/...)."""
147
- if not display_trim:
148
- return None
149
- if (make, model) in TRIM_CHOICES and display_trim in TRIM_ALIAS_TO_GENERIC:
150
- return TRIM_ALIAS_TO_GENERIC[display_trim]
151
- return display_trim
152
-
153
- def apply_filters(df, body, fuel, y_min, y_max, p_min, p_max, safety, reliab):
154
- out = df.copy()
155
- if body != 'Any': out = out[out['body_type'] == body]
156
- if fuel != 'Any': out = out[out['fuel'] == fuel]
157
- out = out[(out['year'] >= y_min) & (out['year'] <= y_max)]
158
- out = out[(out['price_usd'] >= p_min) & (out['price_usd'] <= p_max)]
159
- out = out[(out['safety_rating'] >= safety) & (out['reliability_score'] >= reliab)]
160
- return out
161
-
162
- def fmt_anchor(r):
163
- return (f"**{r['name']}** \n"
164
- f"Brand: {r['make']} • Model: {r['model']} • Trim: {r['trim']} • Year: {r['year']} \n"
165
- f"Body: {r['body_type']} • Fuel: {r['fuel']} • Engine: {r['engine_type']} \n"
166
- f"HP: {int(r['horsepower'])} • 0–100: {r['zero_to_100_kmh_s']}s • Price: ${int(r['price_usd']):,} \n"
167
- f"Popularity {int(r['popularity_score'])}/10 • Comfort {int(r['comfort_score'])}/10 • "
168
- f"Reliability {int(r['reliability_score'])}/100 • Safety {int(r['safety_rating'])}★")
169
-
170
- # ===========================
171
- # Anchor selection & ranking
172
- # ===========================
173
-
174
- def anchor_row(make, model, trim_display, year):
175
- """Pick the anchor row with graceful fallbacks so we never dead-end."""
176
- trim_generic = normalize_trim_for_query(make, model, trim_display)
177
-
178
- sub = DF.copy()
179
- if make: sub = sub[sub['make'] == make]
180
- if model: sub = sub[sub['model'] == model]
181
-
182
- def pick(df_):
183
- return None if df_.empty else df_.sort_values('popularity_score', ascending=False).iloc[0]
184
-
185
- # 1) exact
186
- exact = sub.copy()
187
- if trim_generic: exact = exact[exact['trim'] == trim_generic]
188
- if year: exact = exact[exact['year'] == year]
189
- if not exact.empty: return pick(exact)
190
-
191
- # 2) same year (ignore trim)
192
- if year:
193
- y_only = sub[sub['year'] == year]
194
- if not y_only.empty: return pick(y_only)
195
-
196
- # 3) same trim (ignore year)
197
- if trim_generic:
198
- t_only = sub[sub['trim'] == trim_generic]
199
- if not t_only.empty: return pick(t_only)
200
-
201
- # 4) fallback: best for that make+model
202
- return pick(sub)
203
-
204
- def recommend(make, model, trim_display, year, topk, alpha,
205
- body, fuel, y_min, y_max, p_min, p_max, safety, reliab,
206
- cross_brand_only=True, exclude_same_model=True):
207
- a = anchor_row(make, model, trim_display, year)
208
- if a is None:
209
- return "No match for that combo.", None, None
210
-
211
- # candidate pool
212
- pool = DF.copy()
213
- if cross_brand_only:
214
- pool = pool[pool['make'] != a['make']]
215
- if exclude_same_model:
216
- pool = pool[~((pool['make'] == a['make']) & (pool['model'] == a['model']))]
217
-
218
- pool = apply_filters(pool, body, fuel, int(y_min), int(y_max), int(p_min), int(p_max), int(safety), int(reliab))
219
- if pool.empty:
220
- return "No cars after filters. Try widening year/price/safety.", None, None
221
-
222
- Etext, Enum = ensure_emb()
223
- idx_anchor = int(a.name)
224
- cand_idx = pool.index.values
225
-
226
- st = cosine_similarity(Etext[idx_anchor:idx_anchor+1], Etext[cand_idx])[0]
227
- sn = cosine_similarity(Enum[idx_anchor:idx_anchor+1], Enum[cand_idx])[0]
228
- s = float(alpha)*st + (1-float(alpha))*sn
229
-
230
- # rank, enforce unique (brand, model) combos
231
- order = np.argsort(-s)
232
- seen = set()
233
- chosen = []
234
- for j in order:
235
- r = DF.loc[cand_idx[j]]
236
- key = (r['make'], r['model'])
237
- if key in seen:
238
- continue
239
- seen.add(key)
240
- chosen.append(cand_idx[j])
241
- if len(chosen) >= int(topk):
242
- break
243
-
244
- if not chosen:
245
- return "No recommendations found after constraints.", None, None
246
-
247
- sel = DF.loc[chosen].copy()
248
- sim_lookup = {cand_idx[j]: round(float(s[j])*100, 1) for j in order}
249
- sel['similarity_%'] = sel.index.map(lambda k: sim_lookup.get(k, 0.0))
250
-
251
- cols = [
252
- 'name','make','model','trim','year','body_type','fuel','engine_type',
253
- 'price_usd','horsepower','zero_to_100_kmh_s',
254
- 'popularity_score','comfort_score','reliability_score',
255
- 'tech_score','ownership_cost_score','safety_rating','similarity_%'
256
- ]
257
- note = (f"α = {float(alpha):.2f} (text ↔ numeric) • Cross-brand only = {cross_brand_only} "
258
- f"• Exclude same model = {exclude_same_model}")
259
- return fmt_anchor(a), sel[cols], note
260
-
261
- # ============
262
- # Gradio UI
263
- # ============
264
-
265
- def build_ui():
266
- y_lo, y_hi = int(DF['year'].min()), int(DF['year'].max())
267
- p_lo, p_hi = int(DF['price_usd'].min()), int(DF['price_usd'].max())
268
-
269
- with gr.Blocks() as demo:
270
- gr.Markdown("# RideSearch — pick a car, get **cross-brand** similar options")
271
-
272
- with gr.Tab("Pick & Recommend"):
273
- with gr.Row():
274
- mk = gr.Dropdown(sorted(DF['make'].dropna().unique().tolist()), label="Make")
275
- md = gr.Dropdown([], label="Model")
276
- tr = gr.Dropdown([], label="Trim (optional)")
277
- yr = gr.Dropdown([], label="Year (optional)")
278
-
279
- mk.change(models_for, mk, md)
280
- md.change(on_model_change, [mk, md], [tr, yr])
281
-
282
- with gr.Row():
283
- body = gr.Dropdown(['Any'] + sorted(DF['body_type'].dropna().unique().tolist()),
284
- value='Any', label='Body')
285
- fuel = gr.Dropdown(['Any'] + sorted(DF['fuel'].dropna().unique().tolist()),
286
- value='Any', label='Fuel')
287
-
288
- with gr.Row():
289
- y_min = gr.Slider(y_lo, y_hi, value=y_lo, step=1, label='Year min')
290
- y_max = gr.Slider(y_lo, y_hi, value=y_hi, step=1, label='Year max')
291
-
292
- with gr.Row():
293
- p_min = gr.Slider(p_lo, p_hi, value=p_lo, step=500, label='Price min (USD)')
294
- p_max = gr.Slider(p_lo, p_hi, value=min(p_hi, 80000), step=500, label='Price max (USD)')
295
-
296
- with gr.Row():
297
- safety = gr.Slider(3, 5, value=4, step=1, label='Min Safety ★')
298
- reliab = gr.Slider(55, 99, value=70, step=1, label='Min Reliability')
299
-
300
- with gr.Row():
301
- topk = gr.Slider(1, 10, value=5, step=1, label='Recommendations')
302
- alpha = gr.Slider(0, 1, value=0.7, step=0.05, label='α — Text vs Numeric')
303
-
304
- with gr.Row():
305
- cross = gr.Checkbox(label="Cross-brand only", value=True)
306
- xmodel = gr.Checkbox(label="Exclude same model family", value=True)
307
-
308
- go = gr.Button("Recommend")
309
- anchor_md = gr.Markdown()
310
- table = gr.Dataframe(interactive=False)
311
- note = gr.Markdown()
312
-
313
- go.click(
314
- recommend,
315
- [mk, md, tr, yr, topk, alpha, body, fuel, y_min, y_max, p_min, p_max, safety, reliab, cross, xmodel],
316
- [anchor_md, table, note]
317
- )
318
-
319
- gr.Markdown("Tip: Leave Trim/Year empty if you’re not sure — the app will fall back smartly.")
320
-
321
- return demo
322
-
323
- demo = build_ui()
324
-
325
- if __name__ == "__main__":
326
- # Works locally and on Hugging Face Spaces
327
- demo.queue().launch(server_name="0.0.0.0", server_port=7860)