Levimichael4 commited on
Commit
02c394e
·
verified ·
1 Parent(s): 9a20d4c

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -173
app.py DELETED
@@ -1,173 +0,0 @@
1
- import os, glob, numpy as np, pandas as pd
2
- from sklearn.metrics.pairwise import cosine_similarity
3
- from sklearn.preprocessing import StandardScaler
4
- import gradio as gr
5
-
6
- # ---------- data loading ----------
7
- def load_df():
8
- if os.path.exists('RideSearch_dataset.csv'):
9
- return pd.read_csv('RideSearch_dataset.csv')
10
- parts = sorted(glob.glob('RideSearch_part*_small.csv'))
11
- if parts:
12
- df = pd.concat([pd.read_csv(p) for p in parts], ignore_index=True)
13
- df.to_csv('RideSearch_dataset.csv', index=False)
14
- return df
15
- raise FileNotFoundError('Upload dataset (RideSearch_part*_small.csv) or RideSearch_dataset.csv')
16
-
17
- DF = load_df()
18
-
19
- NUM = [
20
- 'horsepower','zero_to_100_kmh_s','seats','cargo_liters','price_usd',
21
- 'popularity_score','comfort_score','reliability_score','tech_score',
22
- 'ownership_cost_score','safety_rating'
23
- ]
24
-
25
- # ---------- embeddings (lazy build if missing) ----------
26
- def ensure_emb():
27
- if not (os.path.exists('emb_text.npy') and os.path.exists('emb_num.npy')):
28
- from sentence_transformers import SentenceTransformer
29
- m = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
30
- te = m.encode(DF['text_record'].astype(str).tolist(),
31
- batch_size=256, show_progress_bar=True, normalize_embeddings=True)
32
- np.save('emb_text.npy', np.asarray(te, dtype='float32'))
33
- X = DF[NUM].copy()
34
- X['zero_to_100_kmh_s'] = -X['zero_to_100_kmh_s'] # smaller time = better
35
- Xs = StandardScaler().fit_transform(X.values.astype('float32'))
36
- np.save('emb_num.npy', Xs.astype('float32'))
37
- return np.load('emb_text.npy'), np.load('emb_num.npy')
38
-
39
- # ---------- dependent dropdown maps ----------
40
- def _map():
41
- m = {}
42
- for mk, g in DF.groupby('make'):
43
- m[mk] = {}
44
- for md, g2 in g.groupby('model'):
45
- m[mk][md] = {
46
- 'trims': sorted(g2['trim'].astype(str).unique().tolist())[:20],
47
- 'years': sorted(g2['year'].astype(int).unique().tolist())
48
- }
49
- return m
50
- MAP = _map()
51
-
52
- def models_for(mk): return sorted(MAP.get(mk, {}).keys()) if mk else []
53
-
54
- def trim_year(mk, md):
55
- d = MAP.get(mk, {}).get(md, {})
56
- return d.get('trims', []), d.get('years', [])
57
-
58
- # ---------- filtering & rec ----------
59
- def anchor_row(mk, md, tr, yr):
60
- sub = DF.copy()
61
- if mk: sub = sub[sub['make'] == mk]
62
- if md: sub = sub[sub['model'] == md]
63
- if tr: sub = sub[sub['trim'] == tr]
64
- if yr: sub = sub[sub['year'] == yr]
65
- if sub.empty: return None
66
- return sub.sort_values('popularity_score', ascending=False).iloc[0]
67
-
68
- def apply_filters(df, body, fuel, y_min, y_max, p_min, p_max, safety, rel):
69
- out = df.copy()
70
- if body != 'Any': out = out[out['body_type'] == body]
71
- if fuel != 'Any': out = out[out['fuel'] == fuel]
72
- out = out[(out['year'] >= y_min) & (out['year'] <= y_max)]
73
- out = out[(out['price_usd'] >= p_min) & (out['price_usd'] <= p_max)]
74
- out = out[(out['safety_rating'] >= safety) & (out['reliability_score'] >= rel)]
75
- return out
76
-
77
- def fmt_card(r):
78
- eff = (f"{int(r['city_mpg'])}-{int(r['highway_mpg'])} mpg"
79
- if pd.notna(r['city_mpg']) else f"{int(r['range_km'])} km range")
80
- return (
81
- f"**{r['name']}**\n"
82
- f"- Brand: {r['make']} | Body: {r['body_type']} | Fuel: {r['fuel']}\n"
83
- f"- HP: {int(r['horsepower'])} | 0–100: {r['zero_to_100_kmh_s']} s | "
84
- f"Price: ${int(r['price_usd']):,}\n"
85
- f"- Popularity {int(r['popularity_score'])}/10 • Comfort {int(r['comfort_score'])}/10 • "
86
- f"Reliability {int(r['reliability_score'])}/100 • Safety {int(r['safety_rating'])}★"
87
- )
88
-
89
- def recommend(mk, md, tr, yr, topk, alpha,
90
- body, fuel, y_min, y_max, p_min, p_max, safety, rel):
91
- a = anchor_row(mk, md, tr, yr)
92
- if a is None:
93
- return "No match for that combo.", None, None
94
- sub = apply_filters(DF, body, fuel, int(y_min), int(y_max), int(p_min), int(p_max), int(safety), int(rel))
95
- if sub.empty:
96
- return "No cars after filters.", None, None
97
-
98
- Et, En = ensure_emb()
99
- idx = int(a.name)
100
- cand = sub.index.values
101
- st = cosine_similarity(Et[idx:idx+1], Et[cand])[0]
102
- sn = cosine_similarity(En[idx:idx+1], En[cand])[0]
103
- s = float(alpha) * st + (1 - float(alpha)) * sn
104
-
105
- # remove self if included
106
- import numpy as np
107
- if idx in cand:
108
- s[np.where(cand == idx)[0][0]] = -1
109
-
110
- order = np.argsort(-s)[:topk]
111
- sel = DF.loc[cand[order]].copy()
112
- sel['similarity_%'] = (s[order]*100).round(1)
113
-
114
- cols = ['name','make','model','trim','year','body_type','fuel','engine_type',
115
- 'price_usd','horsepower','zero_to_100_kmh_s',
116
- 'popularity_score','comfort_score','reliability_score','tech_score',
117
- 'ownership_cost_score','safety_rating','similarity_%']
118
- return fmt_card(a), sel[cols], f"α = {alpha:.2f} (text ↔ numeric)"
119
-
120
- # ---------- UI (no RangeSlider; use min/max sliders) ----------
121
- with gr.Blocks() as demo:
122
- gr.Markdown("# RideSearch — pick a car, get similar across brands")
123
-
124
- with gr.Tab("Pick & Recommend"):
125
- with gr.Row():
126
- mk = gr.Dropdown(sorted(DF['make'].unique().tolist()), label="Make")
127
- md = gr.Dropdown([], label="Model")
128
- tr = gr.Dropdown([], label="Trim (optional)")
129
- yr = gr.Dropdown([], label="Year (optional)")
130
- mk.change(models_for, mk, md)
131
- def _up(a,b):
132
- t, y = trim_year(a,b); return t, y
133
- md.change(_up, [mk, md], [tr, yr])
134
-
135
- ylo, yhi = int(DF['year'].min()), int(DF['year'].max())
136
- plo, phi = int(DF['price_usd'].min()), int(DF['price_usd'].max())
137
-
138
- with gr.Row():
139
- body = gr.Dropdown(['Any']+sorted(DF['body_type'].unique().tolist()), value='Any', label='Body')
140
- fuel = gr.Dropdown(['Any']+sorted(DF['fuel'].unique().tolist()), value='Any', label='Fuel')
141
-
142
- with gr.Row():
143
- y_min = gr.Slider(ylo, yhi, value=ylo, step=1, label='Year min')
144
- y_max = gr.Slider(ylo, yhi, value=yhi, step=1, label='Year max')
145
-
146
- with gr.Row():
147
- p_min = gr.Slider(plo, phi, value=plo, step=500, label='Price min (USD)')
148
- p_max = gr.Slider(plo, phi, value=min(phi, 60000), step=500, label='Price max (USD)')
149
-
150
- with gr.Row():
151
- safety = gr.Slider(3,5,value=4,step=1,label='Min Safety ★')
152
- rel = gr.Slider(55,99,value=70,step=1,label='Min Reliability')
153
-
154
- with gr.Row():
155
- topk = gr.Slider(1,10,value=5,step=1,label='Recommendations')
156
- alpha = gr.Slider(0,1,value=0.7,step=0.05,label='α — Text vs Numeric')
157
-
158
- go = gr.Button("Recommend")
159
- anchor_md = gr.Markdown()
160
- table = gr.Dataframe(interactive=False)
161
- note = gr.Markdown()
162
-
163
- go.click(
164
- recommend,
165
- [mk,md,tr,yr,topk,alpha,body,fuel,y_min,y_max,p_min,p_max,safety,rel],
166
- [anchor_md, table, note]
167
- )
168
-
169
- # IMPORTANT for Hugging Face Spaces: do NOT call demo.launch()
170
- demo
171
- # --- run locally AND in Spaces ---
172
- if __name__ == "__main__":
173
- demo.queue().launch(server_name="0.0.0.0", server_port=7860)