Levimichael4 commited on
Commit
8c5fde5
·
verified ·
1 Parent(s): 02c394e

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +113 -0
app.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os, glob, numpy as np, pandas as pd
3
+ from sklearn.metrics.pairwise import cosine_similarity
4
+ from sklearn.preprocessing import StandardScaler
5
+ import gradio as gr, plotly.express as px
6
+
7
+ def load_df():
8
+ if os.path.exists('RideSearch_dataset.csv'):
9
+ return pd.read_csv('RideSearch_dataset.csv')
10
+ parts = sorted(glob.glob('RideSearch_part*_small.csv'))
11
+ if parts:
12
+ df = pd.concat([pd.read_csv(p) for p in parts], ignore_index=True)
13
+ df.to_csv('RideSearch_dataset.csv', index=False)
14
+ return df
15
+ raise FileNotFoundError('Upload dataset (parts or full) first.')
16
+
17
+ DF = load_df()
18
+ NUM = ['horsepower','zero_to_100_kmh_s','seats','cargo_liters','price_usd','popularity_score','comfort_score','reliability_score','tech_score','ownership_cost_score','safety_rating']
19
+
20
+ def ensure_emb():
21
+ if not (os.path.exists('emb_text.npy') and os.path.exists('emb_num.npy')):
22
+ from sentence_transformers import SentenceTransformer
23
+ m = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
24
+ te = m.encode(DF['text_record'].astype(str).tolist(), batch_size=256, show_progress_bar=True, normalize_embeddings=True)
25
+ np.save('emb_text.npy', np.asarray(te, dtype='float32'))
26
+ X = DF[NUM].copy(); X['zero_to_100_kmh_s'] = -X['zero_to_100_kmh_s']
27
+ Xs = StandardScaler().fit_transform(X.values.astype('float32'))
28
+ np.save('emb_num.npy', Xs.astype('float32'))
29
+ return np.load('emb_text.npy'), np.load('emb_num.npy')
30
+
31
+ def mapping():
32
+ m = {}
33
+ for mk, g in DF.groupby('make'):
34
+ m[mk] = {}
35
+ for md, g2 in g.groupby('model'):
36
+ m[mk][md] = {'trims': sorted(g2['trim'].astype(str).unique().tolist())[:20], 'years': sorted(g2['year'].astype(int).unique().tolist())}
37
+ return m
38
+ MAP = mapping()
39
+
40
+ def models_for(mk): return sorted(MAP.get(mk, {}).keys()) if mk else []
41
+ def trim_year(mk, md):
42
+ d = MAP.get(mk, {}).get(md, {})
43
+ return d.get('trims', []), d.get('years', [])
44
+
45
+ def anchor(mk, md, tr, yr):
46
+ sub = DF.copy()
47
+ if mk: sub = sub[sub['make'] == mk]
48
+ if md: sub = sub[sub['model'] == md]
49
+ if tr: sub = sub[sub['trim'] == tr]
50
+ if yr: sub = sub[sub['year'] == yr]
51
+ return None if sub.empty else sub.sort_values('popularity_score', ascending=False).iloc[0]
52
+
53
+ def filt(df, body, fuel, yr_rng, price, safety, rel):
54
+ out = df.copy()
55
+ if body != 'Any': out = out[out['body_type'] == body]
56
+ if fuel != 'Any': out = out[out['fuel'] == fuel]
57
+ y0, y1 = yr_rng; p0, p1 = price
58
+ out = out[(out['year'] >= y0) & (out['year'] <= y1) & (out['price_usd'] >= p0) & (out['price_usd'] <= p1)]
59
+ out = out[(out['safety_rating'] >= safety) & (out['reliability_score'] >= rel)]
60
+ return out
61
+
62
+ def fmt(r):
63
+ eff = (f"{int(r['city_mpg'])}-{int(r['highway_mpg'])} mpg" if pd.notna(r['city_mpg']) else f"{int(r['range_km'])} km range")
64
+ return (
65
+ f"**{r['name']}**\n"
66
+ f"- Brand: {r['make']} | Body: {r['body_type']} | Fuel: {r['fuel']}\n"
67
+ f"- HP: {int(r['horsepower'])} | 0–100: {r['zero_to_100_kmh_s']} s | Price: ${int(r['price_usd']):,}\n"
68
+ f"- Popularity {int(r['popularity_score'])}/10 • Comfort {int(r['comfort_score'])}/10 • Reliability {int(r['reliability_score'])}/100 • Safety {int(r['safety_rating'])}★"
69
+ )
70
+
71
+ def rec(mk, md, tr, yr, topk, alpha, body, fuel, yr_rng, price, safety, rel):
72
+ a = anchor(mk, md, tr, yr)
73
+ if a is None: return 'No match for that combo.', None, None
74
+ sub = filt(DF, body, fuel, yr_rng, price, safety, rel)
75
+ if sub.empty: return 'No cars after filters.', None, None
76
+ Et, En = ensure_emb(); idx = a.name; cand = sub.index.values
77
+ st = cosine_similarity(Et[idx:idx+1], Et[cand])[0]; sn = cosine_similarity(En[idx:idx+1], En[cand])[0]
78
+ s = float(alpha)*st + (1-float(alpha))*sn
79
+ import numpy as np
80
+ if idx in cand: s[np.where(cand==idx)[0][0]] = -1
81
+ order = np.argsort(-s)[:topk]; sel = DF.loc[cand[order]].copy(); sel['similarity_%'] = (s[order]*100).round(1)
82
+ cols = ['name','make','model','trim','year','body_type','fuel','engine_type','price_usd','horsepower','zero_to_100_kmh_s','popularity_score','comfort_score','reliability_score','tech_score','ownership_cost_score','safety_rating','similarity_%']
83
+ return fmt(a), sel[cols], f'α={alpha:.2f}'
84
+
85
+ with gr.Blocks() as demo:
86
+ gr.Markdown('# RideSearch — Pick a car, get similar across brands')
87
+
88
+ with gr.Tab('Pick & Recommend'):
89
+ with gr.Row():
90
+ mk = gr.Dropdown(sorted(DF['make'].unique().tolist()), label='Make')
91
+ md = gr.Dropdown([], label='Model')
92
+ tr = gr.Dropdown([], label='Trim (optional)')
93
+ yr = gr.Dropdown([], label='Year (optional)')
94
+ mk.change(models_for, mk, md)
95
+ def _u(a,b): t,y = trim_year(a,b); return t,y
96
+ md.change(_u, [mk,md], [tr,yr])
97
+
98
+ with gr.Row():
99
+ body = gr.Dropdown(['Any']+sorted(DF['body_type'].unique().tolist()), value='Any', label='Body')
100
+ fuel = gr.Dropdown(['Any']+sorted(DF['fuel'].unique().tolist()), value='Any', label='Fuel')
101
+ yr_rng = gr.RangeSlider(int(DF['year'].min()), int(DF['year'].max()), value=[int(DF['year'].min()), int(DF['year'].max())], step=1, label='Year range')
102
+ with gr.Row():
103
+ price = gr.RangeSlider(int(DF['price_usd'].min()), int(DF['price_usd'].max()), value=[int(DF['price_usd'].min()), min(int(DF['price_usd'].max()),60000)], step=500, label='Price (USD)')
104
+ safety = gr.Slider(3,5,value=4,step=1,label='Min Safety ★')
105
+ rel = gr.Slider(55,99,value=70,step=1,label='Min Reliability')
106
+ with gr.Row():
107
+ topk = gr.Slider(1,10,value=5,step=1,label='Recommendations')
108
+ alpha = gr.Slider(0,1,value=0.7,step=0.05,label='α — Text vs Numeric')
109
+ btn = gr.Button('Recommend')
110
+ a = gr.Markdown(); recs = gr.Dataframe(interactive=False); note = gr.Markdown()
111
+ btn.click(rec, [mk,md,tr,yr,topk,alpha,body,fuel,yr_rng,price,safety,rel], [a,recs,note])
112
+
113
+ demo.launch()