Levimichael4 commited on
Commit
a63fdc7
·
verified ·
1 Parent(s): 8c5fde5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -45
app.py CHANGED
@@ -1,9 +1,9 @@
1
-
2
  import os, glob, numpy as np, pandas as pd
3
  from sklearn.metrics.pairwise import cosine_similarity
4
  from sklearn.preprocessing import StandardScaler
5
- import gradio as gr, plotly.express as px
6
 
 
7
  def load_df():
8
  if os.path.exists('RideSearch_dataset.csv'):
9
  return pd.read_csv('RideSearch_dataset.csv')
@@ -12,102 +12,168 @@ def load_df():
12
  df = pd.concat([pd.read_csv(p) for p in parts], ignore_index=True)
13
  df.to_csv('RideSearch_dataset.csv', index=False)
14
  return df
15
- raise FileNotFoundError('Upload dataset (parts or full) first.')
16
 
17
  DF = load_df()
18
- NUM = ['horsepower','zero_to_100_kmh_s','seats','cargo_liters','price_usd','popularity_score','comfort_score','reliability_score','tech_score','ownership_cost_score','safety_rating']
19
 
 
 
 
 
 
 
 
20
  def ensure_emb():
21
  if not (os.path.exists('emb_text.npy') and os.path.exists('emb_num.npy')):
22
  from sentence_transformers import SentenceTransformer
23
  m = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
24
- te = m.encode(DF['text_record'].astype(str).tolist(), batch_size=256, show_progress_bar=True, normalize_embeddings=True)
 
 
 
25
  np.save('emb_text.npy', np.asarray(te, dtype='float32'))
26
- X = DF[NUM].copy(); X['zero_to_100_kmh_s'] = -X['zero_to_100_kmh_s']
 
27
  Xs = StandardScaler().fit_transform(X.values.astype('float32'))
28
  np.save('emb_num.npy', Xs.astype('float32'))
29
  return np.load('emb_text.npy'), np.load('emb_num.npy')
30
 
31
- def mapping():
 
32
  m = {}
33
  for mk, g in DF.groupby('make'):
34
  m[mk] = {}
35
  for md, g2 in g.groupby('model'):
36
- m[mk][md] = {'trims': sorted(g2['trim'].astype(str).unique().tolist())[:20], 'years': sorted(g2['year'].astype(int).unique().tolist())}
 
 
 
37
  return m
38
- MAP = mapping()
 
 
 
 
 
39
 
40
- def models_for(mk): return sorted(MAP.get(mk, {}).keys()) if mk else []
41
  def trim_year(mk, md):
42
  d = MAP.get(mk, {}).get(md, {})
43
  return d.get('trims', []), d.get('years', [])
44
 
45
- def anchor(mk, md, tr, yr):
 
 
 
 
 
46
  sub = DF.copy()
47
  if mk: sub = sub[sub['make'] == mk]
48
  if md: sub = sub[sub['model'] == md]
49
  if tr: sub = sub[sub['trim'] == tr]
50
  if yr: sub = sub[sub['year'] == yr]
51
- return None if sub.empty else sub.sort_values('popularity_score', ascending=False).iloc[0]
 
52
 
53
- def filt(df, body, fuel, yr_rng, price, safety, rel):
54
  out = df.copy()
55
  if body != 'Any': out = out[out['body_type'] == body]
56
  if fuel != 'Any': out = out[out['fuel'] == fuel]
57
- y0, y1 = yr_rng; p0, p1 = price
58
- out = out[(out['year'] >= y0) & (out['year'] <= y1) & (out['price_usd'] >= p0) & (out['price_usd'] <= p1)]
59
  out = out[(out['safety_rating'] >= safety) & (out['reliability_score'] >= rel)]
60
  return out
61
 
62
- def fmt(r):
63
- eff = (f"{int(r['city_mpg'])}-{int(r['highway_mpg'])} mpg" if pd.notna(r['city_mpg']) else f"{int(r['range_km'])} km range")
 
64
  return (
65
  f"**{r['name']}**\n"
66
  f"- Brand: {r['make']} | Body: {r['body_type']} | Fuel: {r['fuel']}\n"
67
- f"- HP: {int(r['horsepower'])} | 0–100: {r['zero_to_100_kmh_s']} s | Price: ${int(r['price_usd']):,}\n"
68
- f"- Popularity {int(r['popularity_score'])}/10 • Comfort {int(r['comfort_score'])}/10 • Reliability {int(r['reliability_score'])}/100 • Safety {int(r['safety_rating'])}★"
 
 
69
  )
70
 
71
- def rec(mk, md, tr, yr, topk, alpha, body, fuel, yr_rng, price, safety, rel):
72
- a = anchor(mk, md, tr, yr)
73
- if a is None: return 'No match for that combo.', None, None
74
- sub = filt(DF, body, fuel, yr_rng, price, safety, rel)
75
- if sub.empty: return 'No cars after filters.', None, None
76
- Et, En = ensure_emb(); idx = a.name; cand = sub.index.values
77
- st = cosine_similarity(Et[idx:idx+1], Et[cand])[0]; sn = cosine_similarity(En[idx:idx+1], En[cand])[0]
78
- s = float(alpha)*st + (1-float(alpha))*sn
 
 
 
 
 
 
 
 
 
 
79
  import numpy as np
80
- if idx in cand: s[np.where(cand==idx)[0][0]] = -1
81
- order = np.argsort(-s)[:topk]; sel = DF.loc[cand[order]].copy(); sel['similarity_%'] = (s[order]*100).round(1)
82
- cols = ['name','make','model','trim','year','body_type','fuel','engine_type','price_usd','horsepower','zero_to_100_kmh_s','popularity_score','comfort_score','reliability_score','tech_score','ownership_cost_score','safety_rating','similarity_%']
83
- return fmt(a), sel[cols], f'α={alpha:.2f}'
 
 
 
 
 
 
 
 
84
 
 
85
  with gr.Blocks() as demo:
86
- gr.Markdown('# RideSearch — Pick a car, get similar across brands')
87
 
88
- with gr.Tab('Pick & Recommend'):
89
  with gr.Row():
90
- mk = gr.Dropdown(sorted(DF['make'].unique().tolist()), label='Make')
91
- md = gr.Dropdown([], label='Model')
92
- tr = gr.Dropdown([], label='Trim (optional)')
93
- yr = gr.Dropdown([], label='Year (optional)')
94
  mk.change(models_for, mk, md)
95
- def _u(a,b): t,y = trim_year(a,b); return t,y
96
- md.change(_u, [mk,md], [tr,yr])
 
 
97
 
98
  with gr.Row():
99
  body = gr.Dropdown(['Any']+sorted(DF['body_type'].unique().tolist()), value='Any', label='Body')
100
  fuel = gr.Dropdown(['Any']+sorted(DF['fuel'].unique().tolist()), value='Any', label='Fuel')
101
- yr_rng = gr.RangeSlider(int(DF['year'].min()), int(DF['year'].max()), value=[int(DF['year'].min()), int(DF['year'].max())], step=1, label='Year range')
 
 
 
 
 
 
 
 
102
  with gr.Row():
103
- price = gr.RangeSlider(int(DF['price_usd'].min()), int(DF['price_usd'].max()), value=[int(DF['price_usd'].min()), min(int(DF['price_usd'].max()),60000)], step=500, label='Price (USD)')
104
  safety = gr.Slider(3,5,value=4,step=1,label='Min Safety ★')
105
  rel = gr.Slider(55,99,value=70,step=1,label='Min Reliability')
 
106
  with gr.Row():
107
  topk = gr.Slider(1,10,value=5,step=1,label='Recommendations')
108
  alpha = gr.Slider(0,1,value=0.7,step=0.05,label='α — Text vs Numeric')
109
- btn = gr.Button('Recommend')
110
- a = gr.Markdown(); recs = gr.Dataframe(interactive=False); note = gr.Markdown()
111
- btn.click(rec, [mk,md,tr,yr,topk,alpha,body,fuel,yr_rng,price,safety,rel], [a,recs,note])
112
 
113
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os, glob, numpy as np, pandas as pd
2
  from sklearn.metrics.pairwise import cosine_similarity
3
  from sklearn.preprocessing import StandardScaler
4
+ import gradio as gr
5
 
6
+ # ---------- data loading ----------
7
  def load_df():
8
  if os.path.exists('RideSearch_dataset.csv'):
9
  return pd.read_csv('RideSearch_dataset.csv')
 
12
  df = pd.concat([pd.read_csv(p) for p in parts], ignore_index=True)
13
  df.to_csv('RideSearch_dataset.csv', index=False)
14
  return df
15
+ raise FileNotFoundError('Upload dataset (RideSearch_part*_small.csv) or RideSearch_dataset.csv')
16
 
17
  DF = load_df()
 
18
 
19
+ NUM = [
20
+ 'horsepower','zero_to_100_kmh_s','seats','cargo_liters','price_usd',
21
+ 'popularity_score','comfort_score','reliability_score','tech_score',
22
+ 'ownership_cost_score','safety_rating'
23
+ ]
24
+
25
+ # ---------- embeddings (lazy build if missing) ----------
26
  def ensure_emb():
27
  if not (os.path.exists('emb_text.npy') and os.path.exists('emb_num.npy')):
28
  from sentence_transformers import SentenceTransformer
29
  m = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
30
+ te = m.encode(
31
+ DF['text_record'].astype(str).tolist(),
32
+ batch_size=256, show_progress_bar=True, normalize_embeddings=True
33
+ )
34
  np.save('emb_text.npy', np.asarray(te, dtype='float32'))
35
+ X = DF[NUM].copy()
36
+ X['zero_to_100_kmh_s'] = -X['zero_to_100_kmh_s'] # smaller time = better
37
  Xs = StandardScaler().fit_transform(X.values.astype('float32'))
38
  np.save('emb_num.npy', Xs.astype('float32'))
39
  return np.load('emb_text.npy'), np.load('emb_num.npy')
40
 
41
+ # ---------- dependent dropdown maps ----------
42
+ def _map():
43
  m = {}
44
  for mk, g in DF.groupby('make'):
45
  m[mk] = {}
46
  for md, g2 in g.groupby('model'):
47
+ m[mk][md] = {
48
+ 'trims': sorted(g2['trim'].astype(str).unique().tolist())[:20],
49
+ 'years': sorted(g2['year'].astype(int).unique().tolist())
50
+ }
51
  return m
52
+ MAP = _map()
53
+
54
+ def models_for(mk):
55
+ # Return a UI update so older Gradio repopulates the choices
56
+ opts = sorted(MAP.get(mk, {}).keys()) if mk else []
57
+ return gr.update(choices=opts, value=None)
58
 
 
59
  def trim_year(mk, md):
60
  d = MAP.get(mk, {}).get(md, {})
61
  return d.get('trims', []), d.get('years', [])
62
 
63
+ def _up(a, b):
64
+ trims, years = trim_year(a, b)
65
+ return gr.update(choices=trims, value=None), gr.update(choices=years, value=None)
66
+
67
+ # ---------- helpers ----------
68
+ def anchor_row(mk, md, tr, yr):
69
  sub = DF.copy()
70
  if mk: sub = sub[sub['make'] == mk]
71
  if md: sub = sub[sub['model'] == md]
72
  if tr: sub = sub[sub['trim'] == tr]
73
  if yr: sub = sub[sub['year'] == yr]
74
+ if sub.empty: return None
75
+ return sub.sort_values('popularity_score', ascending=False).iloc[0]
76
 
77
+ def apply_filters(df, body, fuel, y_min, y_max, p_min, p_max, safety, rel):
78
  out = df.copy()
79
  if body != 'Any': out = out[out['body_type'] == body]
80
  if fuel != 'Any': out = out[out['fuel'] == fuel]
81
+ out = out[(out['year'] >= y_min) & (out['year'] <= y_max)]
82
+ out = out[(out['price_usd'] >= p_min) & (out['price_usd'] <= p_max)]
83
  out = out[(out['safety_rating'] >= safety) & (out['reliability_score'] >= rel)]
84
  return out
85
 
86
+ def fmt_card(r):
87
+ eff = (f"{int(r['city_mpg'])}-{int(r['highway_mpg'])} mpg"
88
+ if pd.notna(r['city_mpg']) else f"{int(r['range_km'])} km range")
89
  return (
90
  f"**{r['name']}**\n"
91
  f"- Brand: {r['make']} | Body: {r['body_type']} | Fuel: {r['fuel']}\n"
92
+ f"- HP: {int(r['horsepower'])} | 0–100: {r['zero_to_100_kmh_s']} s | "
93
+ f"Price: ${int(r['price_usd']):,}\n"
94
+ f"- Popularity {int(r['popularity_score'])}/10 • Comfort {int(r['comfort_score'])}/10 • "
95
+ f"Reliability {int(r['reliability_score'])}/100 • Safety {int(r['safety_rating'])}★"
96
  )
97
 
98
+ def recommend(mk, md, tr, yr, topk, alpha,
99
+ body, fuel, y_min, y_max, p_min, p_max, safety, rel):
100
+ a = anchor_row(mk, md, tr, yr)
101
+ if a is None:
102
+ return "No match for that combo.", None, None
103
+ sub = apply_filters(
104
+ DF, body, fuel, int(y_min), int(y_max), int(p_min), int(p_max), int(safety), int(rel)
105
+ )
106
+ if sub.empty:
107
+ return "No cars after filters.", None, None
108
+
109
+ Et, En = ensure_emb()
110
+ idx = int(a.name)
111
+ cand = sub.index.values
112
+ st = cosine_similarity(Et[idx:idx+1], Et[cand])[0]
113
+ sn = cosine_similarity(En[idx:idx+1], En[cand])[0]
114
+ s = float(alpha) * st + (1 - float(alpha)) * sn
115
+
116
  import numpy as np
117
+ if idx in cand:
118
+ s[np.where(cand == idx)[0][0]] = -1
119
+
120
+ order = np.argsort(-s)[:topk]
121
+ sel = DF.loc[cand[order]].copy()
122
+ sel['similarity_%'] = (s[order]*100).round(1)
123
+
124
+ cols = ['name','make','model','trim','year','body_type','fuel','engine_type',
125
+ 'price_usd','horsepower','zero_to_100_kmh_s',
126
+ 'popularity_score','comfort_score','reliability_score','tech_score',
127
+ 'ownership_cost_score','safety_rating','similarity_%']
128
+ return fmt_card(a), sel[cols], f"α = {alpha:.2f} (text ↔ numeric)"
129
 
130
+ # ---------- UI (no RangeSlider; use min/max sliders) ----------
131
  with gr.Blocks() as demo:
132
+ gr.Markdown("# RideSearch — pick a car, get similar across brands")
133
 
134
+ with gr.Tab("Pick & Recommend"):
135
  with gr.Row():
136
+ mk = gr.Dropdown(sorted(DF['make'].unique().tolist()), label="Make", value=None)
137
+ md = gr.Dropdown([], label="Model", value=None)
138
+ tr = gr.Dropdown([], label="Trim (optional)", value=None)
139
+ yr = gr.Dropdown([], label="Year (optional)", value=None)
140
  mk.change(models_for, mk, md)
141
+ md.change(_up, [mk, md], [tr, yr])
142
+
143
+ ylo, yhi = int(DF['year'].min()), int(DF['year'].max())
144
+ plo, phi = int(DF['price_usd'].min()), int(DF['price_usd'].max())
145
 
146
  with gr.Row():
147
  body = gr.Dropdown(['Any']+sorted(DF['body_type'].unique().tolist()), value='Any', label='Body')
148
  fuel = gr.Dropdown(['Any']+sorted(DF['fuel'].unique().tolist()), value='Any', label='Fuel')
149
+
150
+ with gr.Row():
151
+ y_min = gr.Slider(ylo, yhi, value=ylo, step=1, label='Year min')
152
+ y_max = gr.Slider(ylo, yhi, value=yhi, step=1, label='Year max')
153
+
154
+ with gr.Row():
155
+ p_min = gr.Slider(plo, phi, value=plo, step=500, label='Price min (USD)')
156
+ p_max = gr.Slider(plo, phi, value=min(phi, 60000), step=500, label='Price max (USD)')
157
+
158
  with gr.Row():
 
159
  safety = gr.Slider(3,5,value=4,step=1,label='Min Safety ★')
160
  rel = gr.Slider(55,99,value=70,step=1,label='Min Reliability')
161
+
162
  with gr.Row():
163
  topk = gr.Slider(1,10,value=5,step=1,label='Recommendations')
164
  alpha = gr.Slider(0,1,value=0.7,step=0.05,label='α — Text vs Numeric')
 
 
 
165
 
166
+ go = gr.Button("Recommend")
167
+ anchor_md = gr.Markdown()
168
+ table = gr.Dataframe(interactive=False)
169
+ note = gr.Markdown()
170
+
171
+ go.click(
172
+ recommend,
173
+ [mk,md,tr,yr,topk,alpha,body,fuel,y_min,y_max,p_min,p_max,safety,rel],
174
+ [anchor_md, table, note]
175
+ )
176
+
177
+ # Works locally and on Spaces:
178
+ if __name__ == "__main__":
179
+ demo.queue().launch(server_name="0.0.0.0", server_port=7860)