Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
-
|
| 2 |
import os, glob, numpy as np, pandas as pd
|
| 3 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 4 |
from sklearn.preprocessing import StandardScaler
|
| 5 |
-
import gradio as gr
|
| 6 |
|
|
|
|
| 7 |
def load_df():
|
| 8 |
if os.path.exists('RideSearch_dataset.csv'):
|
| 9 |
return pd.read_csv('RideSearch_dataset.csv')
|
|
@@ -12,102 +12,168 @@ def load_df():
|
|
| 12 |
df = pd.concat([pd.read_csv(p) for p in parts], ignore_index=True)
|
| 13 |
df.to_csv('RideSearch_dataset.csv', index=False)
|
| 14 |
return df
|
| 15 |
-
raise FileNotFoundError('Upload dataset (
|
| 16 |
|
| 17 |
DF = load_df()
|
| 18 |
-
NUM = ['horsepower','zero_to_100_kmh_s','seats','cargo_liters','price_usd','popularity_score','comfort_score','reliability_score','tech_score','ownership_cost_score','safety_rating']
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
def ensure_emb():
|
| 21 |
if not (os.path.exists('emb_text.npy') and os.path.exists('emb_num.npy')):
|
| 22 |
from sentence_transformers import SentenceTransformer
|
| 23 |
m = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
| 24 |
-
te = m.encode(
|
|
|
|
|
|
|
|
|
|
| 25 |
np.save('emb_text.npy', np.asarray(te, dtype='float32'))
|
| 26 |
-
X = DF[NUM].copy()
|
|
|
|
| 27 |
Xs = StandardScaler().fit_transform(X.values.astype('float32'))
|
| 28 |
np.save('emb_num.npy', Xs.astype('float32'))
|
| 29 |
return np.load('emb_text.npy'), np.load('emb_num.npy')
|
| 30 |
|
| 31 |
-
|
|
|
|
| 32 |
m = {}
|
| 33 |
for mk, g in DF.groupby('make'):
|
| 34 |
m[mk] = {}
|
| 35 |
for md, g2 in g.groupby('model'):
|
| 36 |
-
m[mk][md] = {
|
|
|
|
|
|
|
|
|
|
| 37 |
return m
|
| 38 |
-
MAP =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
-
def models_for(mk): return sorted(MAP.get(mk, {}).keys()) if mk else []
|
| 41 |
def trim_year(mk, md):
|
| 42 |
d = MAP.get(mk, {}).get(md, {})
|
| 43 |
return d.get('trims', []), d.get('years', [])
|
| 44 |
|
| 45 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
sub = DF.copy()
|
| 47 |
if mk: sub = sub[sub['make'] == mk]
|
| 48 |
if md: sub = sub[sub['model'] == md]
|
| 49 |
if tr: sub = sub[sub['trim'] == tr]
|
| 50 |
if yr: sub = sub[sub['year'] == yr]
|
| 51 |
-
|
|
|
|
| 52 |
|
| 53 |
-
def
|
| 54 |
out = df.copy()
|
| 55 |
if body != 'Any': out = out[out['body_type'] == body]
|
| 56 |
if fuel != 'Any': out = out[out['fuel'] == fuel]
|
| 57 |
-
|
| 58 |
-
out = out[(out['
|
| 59 |
out = out[(out['safety_rating'] >= safety) & (out['reliability_score'] >= rel)]
|
| 60 |
return out
|
| 61 |
|
| 62 |
-
def
|
| 63 |
-
eff = (f"{int(r['city_mpg'])}-{int(r['highway_mpg'])} mpg"
|
|
|
|
| 64 |
return (
|
| 65 |
f"**{r['name']}**\n"
|
| 66 |
f"- Brand: {r['make']} | Body: {r['body_type']} | Fuel: {r['fuel']}\n"
|
| 67 |
-
f"- HP: {int(r['horsepower'])} | 0–100: {r['zero_to_100_kmh_s']} s |
|
| 68 |
-
f"
|
|
|
|
|
|
|
| 69 |
)
|
| 70 |
|
| 71 |
-
def
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
import numpy as np
|
| 80 |
-
if idx in cand:
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
|
|
|
|
| 85 |
with gr.Blocks() as demo:
|
| 86 |
-
gr.Markdown(
|
| 87 |
|
| 88 |
-
with gr.Tab(
|
| 89 |
with gr.Row():
|
| 90 |
-
mk = gr.Dropdown(sorted(DF['make'].unique().tolist()), label=
|
| 91 |
-
md = gr.Dropdown([], label=
|
| 92 |
-
tr = gr.Dropdown([], label=
|
| 93 |
-
yr = gr.Dropdown([], label=
|
| 94 |
mk.change(models_for, mk, md)
|
| 95 |
-
|
| 96 |
-
|
|
|
|
|
|
|
| 97 |
|
| 98 |
with gr.Row():
|
| 99 |
body = gr.Dropdown(['Any']+sorted(DF['body_type'].unique().tolist()), value='Any', label='Body')
|
| 100 |
fuel = gr.Dropdown(['Any']+sorted(DF['fuel'].unique().tolist()), value='Any', label='Fuel')
|
| 101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
with gr.Row():
|
| 103 |
-
price = gr.RangeSlider(int(DF['price_usd'].min()), int(DF['price_usd'].max()), value=[int(DF['price_usd'].min()), min(int(DF['price_usd'].max()),60000)], step=500, label='Price (USD)')
|
| 104 |
safety = gr.Slider(3,5,value=4,step=1,label='Min Safety ★')
|
| 105 |
rel = gr.Slider(55,99,value=70,step=1,label='Min Reliability')
|
|
|
|
| 106 |
with gr.Row():
|
| 107 |
topk = gr.Slider(1,10,value=5,step=1,label='Recommendations')
|
| 108 |
alpha = gr.Slider(0,1,value=0.7,step=0.05,label='α — Text vs Numeric')
|
| 109 |
-
btn = gr.Button('Recommend')
|
| 110 |
-
a = gr.Markdown(); recs = gr.Dataframe(interactive=False); note = gr.Markdown()
|
| 111 |
-
btn.click(rec, [mk,md,tr,yr,topk,alpha,body,fuel,yr_rng,price,safety,rel], [a,recs,note])
|
| 112 |
|
| 113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os, glob, numpy as np, pandas as pd
|
| 2 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 3 |
from sklearn.preprocessing import StandardScaler
|
| 4 |
+
import gradio as gr
|
| 5 |
|
| 6 |
+
# ---------- data loading ----------
|
| 7 |
def load_df():
|
| 8 |
if os.path.exists('RideSearch_dataset.csv'):
|
| 9 |
return pd.read_csv('RideSearch_dataset.csv')
|
|
|
|
| 12 |
df = pd.concat([pd.read_csv(p) for p in parts], ignore_index=True)
|
| 13 |
df.to_csv('RideSearch_dataset.csv', index=False)
|
| 14 |
return df
|
| 15 |
+
raise FileNotFoundError('Upload dataset (RideSearch_part*_small.csv) or RideSearch_dataset.csv')
|
| 16 |
|
| 17 |
DF = load_df()
|
|
|
|
| 18 |
|
| 19 |
+
NUM = [
|
| 20 |
+
'horsepower','zero_to_100_kmh_s','seats','cargo_liters','price_usd',
|
| 21 |
+
'popularity_score','comfort_score','reliability_score','tech_score',
|
| 22 |
+
'ownership_cost_score','safety_rating'
|
| 23 |
+
]
|
| 24 |
+
|
| 25 |
+
# ---------- embeddings (lazy build if missing) ----------
|
| 26 |
def ensure_emb():
|
| 27 |
if not (os.path.exists('emb_text.npy') and os.path.exists('emb_num.npy')):
|
| 28 |
from sentence_transformers import SentenceTransformer
|
| 29 |
m = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
| 30 |
+
te = m.encode(
|
| 31 |
+
DF['text_record'].astype(str).tolist(),
|
| 32 |
+
batch_size=256, show_progress_bar=True, normalize_embeddings=True
|
| 33 |
+
)
|
| 34 |
np.save('emb_text.npy', np.asarray(te, dtype='float32'))
|
| 35 |
+
X = DF[NUM].copy()
|
| 36 |
+
X['zero_to_100_kmh_s'] = -X['zero_to_100_kmh_s'] # smaller time = better
|
| 37 |
Xs = StandardScaler().fit_transform(X.values.astype('float32'))
|
| 38 |
np.save('emb_num.npy', Xs.astype('float32'))
|
| 39 |
return np.load('emb_text.npy'), np.load('emb_num.npy')
|
| 40 |
|
| 41 |
+
# ---------- dependent dropdown maps ----------
|
| 42 |
+
def _map():
|
| 43 |
m = {}
|
| 44 |
for mk, g in DF.groupby('make'):
|
| 45 |
m[mk] = {}
|
| 46 |
for md, g2 in g.groupby('model'):
|
| 47 |
+
m[mk][md] = {
|
| 48 |
+
'trims': sorted(g2['trim'].astype(str).unique().tolist())[:20],
|
| 49 |
+
'years': sorted(g2['year'].astype(int).unique().tolist())
|
| 50 |
+
}
|
| 51 |
return m
|
| 52 |
+
MAP = _map()
|
| 53 |
+
|
| 54 |
+
def models_for(mk):
|
| 55 |
+
# Return a UI update so older Gradio repopulates the choices
|
| 56 |
+
opts = sorted(MAP.get(mk, {}).keys()) if mk else []
|
| 57 |
+
return gr.update(choices=opts, value=None)
|
| 58 |
|
|
|
|
| 59 |
def trim_year(mk, md):
|
| 60 |
d = MAP.get(mk, {}).get(md, {})
|
| 61 |
return d.get('trims', []), d.get('years', [])
|
| 62 |
|
| 63 |
+
def _up(a, b):
|
| 64 |
+
trims, years = trim_year(a, b)
|
| 65 |
+
return gr.update(choices=trims, value=None), gr.update(choices=years, value=None)
|
| 66 |
+
|
| 67 |
+
# ---------- helpers ----------
|
| 68 |
+
def anchor_row(mk, md, tr, yr):
|
| 69 |
sub = DF.copy()
|
| 70 |
if mk: sub = sub[sub['make'] == mk]
|
| 71 |
if md: sub = sub[sub['model'] == md]
|
| 72 |
if tr: sub = sub[sub['trim'] == tr]
|
| 73 |
if yr: sub = sub[sub['year'] == yr]
|
| 74 |
+
if sub.empty: return None
|
| 75 |
+
return sub.sort_values('popularity_score', ascending=False).iloc[0]
|
| 76 |
|
| 77 |
+
def apply_filters(df, body, fuel, y_min, y_max, p_min, p_max, safety, rel):
|
| 78 |
out = df.copy()
|
| 79 |
if body != 'Any': out = out[out['body_type'] == body]
|
| 80 |
if fuel != 'Any': out = out[out['fuel'] == fuel]
|
| 81 |
+
out = out[(out['year'] >= y_min) & (out['year'] <= y_max)]
|
| 82 |
+
out = out[(out['price_usd'] >= p_min) & (out['price_usd'] <= p_max)]
|
| 83 |
out = out[(out['safety_rating'] >= safety) & (out['reliability_score'] >= rel)]
|
| 84 |
return out
|
| 85 |
|
| 86 |
+
def fmt_card(r):
|
| 87 |
+
eff = (f"{int(r['city_mpg'])}-{int(r['highway_mpg'])} mpg"
|
| 88 |
+
if pd.notna(r['city_mpg']) else f"{int(r['range_km'])} km range")
|
| 89 |
return (
|
| 90 |
f"**{r['name']}**\n"
|
| 91 |
f"- Brand: {r['make']} | Body: {r['body_type']} | Fuel: {r['fuel']}\n"
|
| 92 |
+
f"- HP: {int(r['horsepower'])} | 0–100: {r['zero_to_100_kmh_s']} s | "
|
| 93 |
+
f"Price: ${int(r['price_usd']):,}\n"
|
| 94 |
+
f"- Popularity {int(r['popularity_score'])}/10 • Comfort {int(r['comfort_score'])}/10 • "
|
| 95 |
+
f"Reliability {int(r['reliability_score'])}/100 • Safety {int(r['safety_rating'])}★"
|
| 96 |
)
|
| 97 |
|
| 98 |
+
def recommend(mk, md, tr, yr, topk, alpha,
|
| 99 |
+
body, fuel, y_min, y_max, p_min, p_max, safety, rel):
|
| 100 |
+
a = anchor_row(mk, md, tr, yr)
|
| 101 |
+
if a is None:
|
| 102 |
+
return "No match for that combo.", None, None
|
| 103 |
+
sub = apply_filters(
|
| 104 |
+
DF, body, fuel, int(y_min), int(y_max), int(p_min), int(p_max), int(safety), int(rel)
|
| 105 |
+
)
|
| 106 |
+
if sub.empty:
|
| 107 |
+
return "No cars after filters.", None, None
|
| 108 |
+
|
| 109 |
+
Et, En = ensure_emb()
|
| 110 |
+
idx = int(a.name)
|
| 111 |
+
cand = sub.index.values
|
| 112 |
+
st = cosine_similarity(Et[idx:idx+1], Et[cand])[0]
|
| 113 |
+
sn = cosine_similarity(En[idx:idx+1], En[cand])[0]
|
| 114 |
+
s = float(alpha) * st + (1 - float(alpha)) * sn
|
| 115 |
+
|
| 116 |
import numpy as np
|
| 117 |
+
if idx in cand:
|
| 118 |
+
s[np.where(cand == idx)[0][0]] = -1
|
| 119 |
+
|
| 120 |
+
order = np.argsort(-s)[:topk]
|
| 121 |
+
sel = DF.loc[cand[order]].copy()
|
| 122 |
+
sel['similarity_%'] = (s[order]*100).round(1)
|
| 123 |
+
|
| 124 |
+
cols = ['name','make','model','trim','year','body_type','fuel','engine_type',
|
| 125 |
+
'price_usd','horsepower','zero_to_100_kmh_s',
|
| 126 |
+
'popularity_score','comfort_score','reliability_score','tech_score',
|
| 127 |
+
'ownership_cost_score','safety_rating','similarity_%']
|
| 128 |
+
return fmt_card(a), sel[cols], f"α = {alpha:.2f} (text ↔ numeric)"
|
| 129 |
|
| 130 |
+
# ---------- UI (no RangeSlider; use min/max sliders) ----------
|
| 131 |
with gr.Blocks() as demo:
|
| 132 |
+
gr.Markdown("# RideSearch — pick a car, get similar across brands")
|
| 133 |
|
| 134 |
+
with gr.Tab("Pick & Recommend"):
|
| 135 |
with gr.Row():
|
| 136 |
+
mk = gr.Dropdown(sorted(DF['make'].unique().tolist()), label="Make", value=None)
|
| 137 |
+
md = gr.Dropdown([], label="Model", value=None)
|
| 138 |
+
tr = gr.Dropdown([], label="Trim (optional)", value=None)
|
| 139 |
+
yr = gr.Dropdown([], label="Year (optional)", value=None)
|
| 140 |
mk.change(models_for, mk, md)
|
| 141 |
+
md.change(_up, [mk, md], [tr, yr])
|
| 142 |
+
|
| 143 |
+
ylo, yhi = int(DF['year'].min()), int(DF['year'].max())
|
| 144 |
+
plo, phi = int(DF['price_usd'].min()), int(DF['price_usd'].max())
|
| 145 |
|
| 146 |
with gr.Row():
|
| 147 |
body = gr.Dropdown(['Any']+sorted(DF['body_type'].unique().tolist()), value='Any', label='Body')
|
| 148 |
fuel = gr.Dropdown(['Any']+sorted(DF['fuel'].unique().tolist()), value='Any', label='Fuel')
|
| 149 |
+
|
| 150 |
+
with gr.Row():
|
| 151 |
+
y_min = gr.Slider(ylo, yhi, value=ylo, step=1, label='Year min')
|
| 152 |
+
y_max = gr.Slider(ylo, yhi, value=yhi, step=1, label='Year max')
|
| 153 |
+
|
| 154 |
+
with gr.Row():
|
| 155 |
+
p_min = gr.Slider(plo, phi, value=plo, step=500, label='Price min (USD)')
|
| 156 |
+
p_max = gr.Slider(plo, phi, value=min(phi, 60000), step=500, label='Price max (USD)')
|
| 157 |
+
|
| 158 |
with gr.Row():
|
|
|
|
| 159 |
safety = gr.Slider(3,5,value=4,step=1,label='Min Safety ★')
|
| 160 |
rel = gr.Slider(55,99,value=70,step=1,label='Min Reliability')
|
| 161 |
+
|
| 162 |
with gr.Row():
|
| 163 |
topk = gr.Slider(1,10,value=5,step=1,label='Recommendations')
|
| 164 |
alpha = gr.Slider(0,1,value=0.7,step=0.05,label='α — Text vs Numeric')
|
|
|
|
|
|
|
|
|
|
| 165 |
|
| 166 |
+
go = gr.Button("Recommend")
|
| 167 |
+
anchor_md = gr.Markdown()
|
| 168 |
+
table = gr.Dataframe(interactive=False)
|
| 169 |
+
note = gr.Markdown()
|
| 170 |
+
|
| 171 |
+
go.click(
|
| 172 |
+
recommend,
|
| 173 |
+
[mk,md,tr,yr,topk,alpha,body,fuel,y_min,y_max,p_min,p_max,safety,rel],
|
| 174 |
+
[anchor_md, table, note]
|
| 175 |
+
)
|
| 176 |
+
|
| 177 |
+
# Works locally and on Spaces:
|
| 178 |
+
if __name__ == "__main__":
|
| 179 |
+
demo.queue().launch(server_name="0.0.0.0", server_port=7860)
|