Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,113 +1,391 @@
|
|
|
|
|
| 1 |
|
| 2 |
-
import os, glob,
|
|
|
|
|
|
|
| 3 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 4 |
from sklearn.preprocessing import StandardScaler
|
| 5 |
-
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
def load_df():
|
| 8 |
if os.path.exists('RideSearch_dataset.csv'):
|
| 9 |
return pd.read_csv('RideSearch_dataset.csv')
|
| 10 |
parts = sorted(glob.glob('RideSearch_part*_small.csv'))
|
| 11 |
-
if parts:
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
|
| 17 |
DF = load_df()
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
def ensure_emb():
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
return
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
out = df.copy()
|
| 55 |
if body != 'Any': out = out[out['body_type'] == body]
|
| 56 |
if fuel != 'Any': out = out[out['fuel'] == fuel]
|
| 57 |
-
|
| 58 |
-
out = out[(out['
|
| 59 |
-
out = out[(out['safety_rating'] >= safety) & (out['reliability_score'] >=
|
| 60 |
return out
|
| 61 |
|
| 62 |
-
def
|
| 63 |
-
|
| 64 |
-
return (
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
s = float(alpha)*st + (1-float(alpha))*sn
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app_new.py — RideSearch (brand-correct trims, cross-brand, smart fallbacks, optional photos)
|
| 2 |
|
| 3 |
+
import os, glob, urllib.parse
|
| 4 |
+
import numpy as np
|
| 5 |
+
import pandas as pd
|
| 6 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 7 |
from sklearn.preprocessing import StandardScaler
|
| 8 |
+
import gradio as gr
|
| 9 |
+
|
| 10 |
+
# =========================
|
| 11 |
+
# Data loading & embeddings
|
| 12 |
+
# =========================
|
| 13 |
|
| 14 |
def load_df():
|
| 15 |
if os.path.exists('RideSearch_dataset.csv'):
|
| 16 |
return pd.read_csv('RideSearch_dataset.csv')
|
| 17 |
parts = sorted(glob.glob('RideSearch_part*_small.csv'))
|
| 18 |
+
if not parts:
|
| 19 |
+
raise FileNotFoundError("Upload RideSearch_dataset.csv OR the 10 parts RideSearch_part*_small.csv.")
|
| 20 |
+
df = pd.concat([pd.read_csv(p) for p in parts], ignore_index=True)
|
| 21 |
+
df.to_csv('RideSearch_dataset.csv', index=False)
|
| 22 |
+
return df
|
| 23 |
|
| 24 |
DF = load_df()
|
| 25 |
+
|
| 26 |
+
NUM_COLS = [
|
| 27 |
+
'horsepower','zero_to_100_kmh_s','seats','cargo_liters','price_usd',
|
| 28 |
+
'popularity_score','comfort_score','reliability_score','tech_score',
|
| 29 |
+
'ownership_cost_score','safety_rating'
|
| 30 |
+
]
|
| 31 |
|
| 32 |
def ensure_emb():
|
| 33 |
+
txt_ok = os.path.exists('emb_text.npy')
|
| 34 |
+
num_ok = os.path.exists('emb_num.npy')
|
| 35 |
+
if txt_ok and num_ok:
|
| 36 |
+
return np.load('emb_text.npy'), np.load('emb_num.npy')
|
| 37 |
+
|
| 38 |
+
from sentence_transformers import SentenceTransformer
|
| 39 |
+
m = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
| 40 |
+
texts = DF['text_record'].astype(str).tolist()
|
| 41 |
+
Etext = m.encode(texts, batch_size=256, show_progress_bar=True, normalize_embeddings=True)
|
| 42 |
+
Etext = np.asarray(Etext, dtype='float32'); np.save('emb_text.npy', Etext)
|
| 43 |
+
|
| 44 |
+
X = DF[NUM_COLS].copy()
|
| 45 |
+
if 'zero_to_100_kmh_s' in X.columns:
|
| 46 |
+
X['zero_to_100_kmh_s'] = -X['zero_to_100_kmh_s'].astype('float32')
|
| 47 |
+
Xs = StandardScaler().fit_transform(X.values.astype('float32'))
|
| 48 |
+
Enum = Xs.astype('float32'); np.save('emb_num.npy', Enum)
|
| 49 |
+
return Etext, Enum
|
| 50 |
+
|
| 51 |
+
# ==========================================
|
| 52 |
+
# Brand-correct trim display & alias mapping
|
| 53 |
+
# ==========================================
|
| 54 |
+
# Expand this list for the models you’ll demo.
|
| 55 |
+
TRIM_CHOICES = {
|
| 56 |
+
("BMW","3 Series"): ["320i","330i","330e","340i","M3"],
|
| 57 |
+
("Audi","A3"): ["35 TFSI","40 TFSI","45 TFSI","S3","RS3"],
|
| 58 |
+
("Audi","A4"): ["35 TFSI","40 TFSI","45 TFSI","S4","RS4"],
|
| 59 |
+
("Mercedes-Benz","C-Class"): ["C200","C220d","C300","AMG C43","AMG C63"],
|
| 60 |
+
("Lexus","IS"): ["IS 300","IS 350","IS 500 F SPORT"],
|
| 61 |
+
("Toyota","Corolla"): ["L","LE","SE","XSE","GR"],
|
| 62 |
+
("Honda","Civic"): ["LX","Sport","EX","Touring","Type R"],
|
| 63 |
+
("Volkswagen","Golf"): ["Trendline","Comfortline","Highline","GTI","R"],
|
| 64 |
+
("Hyundai","Elantra"): ["SE","SEL","Limited","N Line","N"],
|
| 65 |
+
("Kia","Forte"): ["LX","S","EX","GT-Line","GT"],
|
| 66 |
+
("Jeep","Wrangler"): ["Sport","Willys","Sahara","Rubicon","392"],
|
| 67 |
+
("Land Rover","Defender"): ["90","110","130","X-Dynamic","V8"],
|
| 68 |
+
("Mazda","Mazda3"): ["S","Select","Preferred","Premium","Turbo"],
|
| 69 |
+
("Mitsubishi","Outlander"): ["ES","SE","SEL","Black Edition","PHEV"],
|
| 70 |
+
("Nissan","Rogue"): ["S","SV","SL","Platinum"],
|
| 71 |
+
("Peugeot","3008"): ["Active","Allure","GT","GT Pack"],
|
| 72 |
+
("Porsche","911"): ["Carrera","Carrera S","GTS","Turbo","GT3"],
|
| 73 |
+
("Ram","1500"): ["Tradesman","Big Horn","Laramie","Rebel","Limited"],
|
| 74 |
+
("Renault","Clio"): ["Authentique","Expression","Dynamique","RS Line"],
|
| 75 |
+
("Seat","Leon"): ["Reference","Style","FR","Cupra"],
|
| 76 |
+
("Skoda","Octavia"): ["Active","Ambition","Style","RS"],
|
| 77 |
+
("Subaru","Outback"): ["Base","Premium","Limited","Wilderness","Touring"],
|
| 78 |
+
("Tesla","Model 3"): ["RWD","Long Range","Performance"],
|
| 79 |
+
("Volkswagen","Tiguan"): ["S","SE","SEL","R-Line"],
|
| 80 |
+
("Volvo","XC60"): ["Core","Plus","Ultimate","Polestar Engineered"],
|
| 81 |
+
("Mini","Cooper"): ["Classic","Signature","Iconic","John Cooper Works"],
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
# Map display trims to dataset generic categories (Base/Sport/Premium/Performance).
|
| 85 |
+
TRIM_ALIAS_TO_GENERIC = {
|
| 86 |
+
# BMW 3
|
| 87 |
+
"320i":"Base","330i":"Sport","330e":"Sport","340i":"Premium","M3":"Performance",
|
| 88 |
+
# Audi
|
| 89 |
+
"35 TFSI":"Base","40 TFSI":"Sport","45 TFSI":"Premium","S3":"Performance","RS3":"Performance",
|
| 90 |
+
"S4":"Performance","RS4":"Performance",
|
| 91 |
+
# Mercedes C
|
| 92 |
+
"C200":"Base","C220d":"Base","C300":"Premium","AMG C43":"Performance","AMG C63":"Performance",
|
| 93 |
+
# Lexus IS
|
| 94 |
+
"IS 300":"Base","IS 350":"Premium","IS 500 F SPORT":"Performance",
|
| 95 |
+
# Toyota Corolla
|
| 96 |
+
"L":"Base","LE":"Base","SE":"Sport","XSE":"Premium","GR":"Performance",
|
| 97 |
+
# Honda Civic
|
| 98 |
+
"LX":"Base","Sport":"Sport","EX":"Premium","Touring":"Premium","Type R":"Performance",
|
| 99 |
+
# VW
|
| 100 |
+
"Trendline":"Base","Comfortline":"Base","Highline":"Premium","GTI":"Performance","R":"Performance",
|
| 101 |
+
"S":"Base","SE":"Sport","SEL":"Premium","R-Line":"Performance",
|
| 102 |
+
# Hyundai Elantra
|
| 103 |
+
"SE":"Base","SEL":"Base","Limited":"Premium","N Line":"Sport","N":"Performance",
|
| 104 |
+
# Kia Forte
|
| 105 |
+
"LX":"Base","S":"Sport","EX":"Premium","GT-Line":"Sport","GT":"Performance",
|
| 106 |
+
# Jeep Wrangler
|
| 107 |
+
"Sport":"Base","Willys":"Sport","Sahara":"Premium","Rubicon":"Performance","392":"Performance",
|
| 108 |
+
# Land Rover Defender
|
| 109 |
+
"90":"Base","110":"Base","130":"Premium","X-Dynamic":"Premium","V8":"Performance",
|
| 110 |
+
# Mazda3
|
| 111 |
+
"S":"Base","Select":"Base","Preferred":"Premium","Premium":"Premium","Turbo":"Performance",
|
| 112 |
+
# Mitsubishi Outlander
|
| 113 |
+
"ES":"Base","SE":"Sport","SEL":"Premium","Black Edition":"Premium","PHEV":"Premium",
|
| 114 |
+
# Nissan Rogue
|
| 115 |
+
"SV":"Sport","SL":"Premium","Platinum":"Premium",
|
| 116 |
+
# Peugeot 3008
|
| 117 |
+
"Active":"Base","Allure":"Premium","GT":"Premium","GT Pack":"Premium",
|
| 118 |
+
# Porsche 911
|
| 119 |
+
"Carrera":"Base","Carrera S":"Premium","GTS":"Premium","Turbo":"Performance","GT3":"Performance",
|
| 120 |
+
# Ram 1500
|
| 121 |
+
"Tradesman":"Base","Big Horn":"Sport","Laramie":"Premium","Rebel":"Sport","Limited":"Premium",
|
| 122 |
+
# Renault Clio
|
| 123 |
+
"Authentique":"Base","Expression":"Sport","Dynamique":"Premium","RS Line":"Performance",
|
| 124 |
+
# Seat Leon
|
| 125 |
+
"Reference":"Base","Style":"Sport","FR":"Sport","Cupra":"Performance",
|
| 126 |
+
# Skoda Octavia
|
| 127 |
+
"Active":"Base","Ambition":"Sport","Style":"Premium","RS":"Performance",
|
| 128 |
+
# Subaru Outback
|
| 129 |
+
"Base":"Base","Premium":"Premium","Limited":"Premium","Wilderness":"Sport","Touring":"Premium",
|
| 130 |
+
# Tesla Model 3
|
| 131 |
+
"RWD":"Base","Long Range":"Premium","Performance":"Performance",
|
| 132 |
+
# Volvo XC60
|
| 133 |
+
"Core":"Base","Plus":"Premium","Ultimate":"Premium","Polestar Engineered":"Performance",
|
| 134 |
+
# Mini Cooper
|
| 135 |
+
"Classic":"Base","Signature":"Premium","Iconic":"Premium","John Cooper Works":"Performance",
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
def normalize_trim_for_query(make, model, display_trim):
|
| 139 |
+
if not display_trim:
|
| 140 |
+
return None
|
| 141 |
+
# If we know this (make, model), map its display name to a generic token
|
| 142 |
+
if (make, model) in TRIM_CHOICES and display_trim in TRIM_ALIAS_TO_GENERIC:
|
| 143 |
+
return TRIM_ALIAS_TO_GENERIC[display_trim]
|
| 144 |
+
return display_trim # already generic or unknown model → pass through
|
| 145 |
+
|
| 146 |
+
def generic_to_display(make, model, generic_trim):
|
| 147 |
+
"""
|
| 148 |
+
Convert dataset generic trim -> a realistic display trim for that make/model.
|
| 149 |
+
If unknown, return the generic string unchanged.
|
| 150 |
+
"""
|
| 151 |
+
if not generic_trim:
|
| 152 |
+
return ""
|
| 153 |
+
if (make, model) not in TRIM_CHOICES:
|
| 154 |
+
return str(generic_trim)
|
| 155 |
+
# pick the first display alias that maps back to this generic
|
| 156 |
+
for alias in TRIM_CHOICES[(make, model)]:
|
| 157 |
+
if TRIM_ALIAS_TO_GENERIC.get(alias) == generic_trim:
|
| 158 |
+
return alias
|
| 159 |
+
return str(generic_trim)
|
| 160 |
+
|
| 161 |
+
# ==============================
|
| 162 |
+
# Dropdowns, filters, formatting
|
| 163 |
+
# ==============================
|
| 164 |
+
|
| 165 |
+
def models_for(make):
|
| 166 |
+
if not make:
|
| 167 |
+
return gr.update(choices=[], value=None)
|
| 168 |
+
opts = sorted(DF.loc[DF['make'].eq(make), 'model'].dropna().unique().tolist())
|
| 169 |
+
return gr.update(choices=opts, value=None)
|
| 170 |
+
|
| 171 |
+
def trim_year(make, model):
|
| 172 |
+
if make and model and (make, model) in TRIM_CHOICES:
|
| 173 |
+
trims = TRIM_CHOICES[(make, model)]
|
| 174 |
+
else:
|
| 175 |
+
sub = DF
|
| 176 |
+
if make: sub = sub[sub['make'] == make]
|
| 177 |
+
if model: sub = sub[sub['model'] == model]
|
| 178 |
+
trims = sorted(sub['trim'].astype(str).dropna().unique().tolist())[:20]
|
| 179 |
+
if make and model:
|
| 180 |
+
years = sorted(
|
| 181 |
+
DF.loc[(DF['make'].eq(make)) & (DF['model'].eq(model)), 'year']
|
| 182 |
+
.dropna().astype(int).unique().tolist()
|
| 183 |
+
)
|
| 184 |
+
else:
|
| 185 |
+
years = []
|
| 186 |
+
return trims, years
|
| 187 |
|
| 188 |
+
def on_model_change(make, model):
|
| 189 |
+
trims, years = trim_year(make, model)
|
| 190 |
+
return gr.update(choices=trims, value=None), gr.update(choices=years, value=None)
|
| 191 |
+
|
| 192 |
+
def apply_filters(df, body, fuel, y_min, y_max, p_min, p_max, safety, reliab):
|
| 193 |
out = df.copy()
|
| 194 |
if body != 'Any': out = out[out['body_type'] == body]
|
| 195 |
if fuel != 'Any': out = out[out['fuel'] == fuel]
|
| 196 |
+
out = out[(out['year'] >= y_min) & (out['year'] <= y_max)]
|
| 197 |
+
out = out[(out['price_usd'] >= p_min) & (out['price_usd'] <= p_max)]
|
| 198 |
+
out = out[(out['safety_rating'] >= safety) & (out['reliability_score'] >= reliab)]
|
| 199 |
return out
|
| 200 |
|
| 201 |
+
def fmt_anchor(r):
|
| 202 |
+
disp_trim = generic_to_display(r['make'], r['model'], r['trim'])
|
| 203 |
+
return (f"**{r['make']} {r['model']} {disp_trim} {int(r['year'])}** \n"
|
| 204 |
+
f"Body: {r['body_type']} • Fuel: {r['fuel']} • Engine: {r['engine_type']} \n"
|
| 205 |
+
f"HP: {int(r['horsepower'])} • 0–100: {r['zero_to_100_kmh_s']}s • Price: ${int(r['price_usd']):,} \n"
|
| 206 |
+
f"Popularity {int(r['popularity_score'])}/10 • Comfort {int(r['comfort_score'])}/10 • "
|
| 207 |
+
f"Reliability {int(r['reliability_score'])}/100 • Safety {int(r['safety_rating'])}★")
|
| 208 |
+
|
| 209 |
+
# ==============
|
| 210 |
+
# Photo helpers
|
| 211 |
+
# ==============
|
| 212 |
+
def placeholder_svg_data_uri(title):
|
| 213 |
+
# Clean inline SVG placeholder so we don't need internet or image hosting
|
| 214 |
+
txt = urllib.parse.quote(title)
|
| 215 |
+
svg = f"""<svg xmlns='http://www.w3.org/2000/svg' width='480' height='320'>
|
| 216 |
+
<rect width='100%' height='100%' fill='#e8eef7'/>
|
| 217 |
+
<text x='50%' y='50%' dominant-baseline='middle' text-anchor='middle'
|
| 218 |
+
font-family='Arial' font-size='26' fill='#223'>
|
| 219 |
+
{title}
|
| 220 |
+
</text>
|
| 221 |
+
</svg>"""
|
| 222 |
+
return "data:image/svg+xml;utf8," + urllib.parse.quote(svg)
|
| 223 |
+
|
| 224 |
+
def build_gallery_html(df_rows):
|
| 225 |
+
# df_rows: pandas DataFrame with rows to render
|
| 226 |
+
cards = []
|
| 227 |
+
for _, r in df_rows.iterrows():
|
| 228 |
+
label = f"{r['make']} {r['model']} {generic_to_display(r['make'], r['model'], r['trim'])}"
|
| 229 |
+
if 'image_url' in r and isinstance(r['image_url'], str) and r['image_url'].strip():
|
| 230 |
+
img_src = r['image_url'].strip()
|
| 231 |
+
else:
|
| 232 |
+
img_src = placeholder_svg_data_uri(f"{r['make']} {r['model']}")
|
| 233 |
+
cards.append(f"""
|
| 234 |
+
<div style="width:240px;margin:6px;border:1px solid #ddd;border-radius:12px;overflow:hidden;background:#fff;">
|
| 235 |
+
<img src="{img_src}" style="width:240px;height:160px;object-fit:cover;display:block" />
|
| 236 |
+
<div style="padding:8px 10px;font:14px/1.3 Arial,sans-serif;color:#111">{label}</div>
|
| 237 |
+
</div>
|
| 238 |
+
""")
|
| 239 |
+
return f"<div style='display:flex;flex-wrap:wrap'>{''.join(cards)}</div>"
|
| 240 |
+
|
| 241 |
+
# ===========================
|
| 242 |
+
# Anchor selection & ranking
|
| 243 |
+
# ===========================
|
| 244 |
+
def anchor_row(make, model, trim_display, year):
|
| 245 |
+
trim_generic = normalize_trim_for_query(make, model, trim_display)
|
| 246 |
+
sub = DF.copy()
|
| 247 |
+
if make: sub = sub[sub['make'] == make]
|
| 248 |
+
if model: sub = sub[sub['model'] == model]
|
| 249 |
+
|
| 250 |
+
def pick(df_): return None if df_.empty else df_.sort_values('popularity_score', ascending=False).iloc[0]
|
| 251 |
+
|
| 252 |
+
# 1) exact
|
| 253 |
+
exact = sub.copy()
|
| 254 |
+
if trim_generic: exact = exact[exact['trim'] == trim_generic]
|
| 255 |
+
if year: exact = exact[exact['year'] == year]
|
| 256 |
+
if not exact.empty: return pick(exact)
|
| 257 |
+
|
| 258 |
+
# 2) same year (ignore trim)
|
| 259 |
+
if year:
|
| 260 |
+
y_only = sub[sub['year'] == year]
|
| 261 |
+
if not y_only.empty: return pick(y_only)
|
| 262 |
+
|
| 263 |
+
# 3) same trim (ignore year)
|
| 264 |
+
if trim_generic:
|
| 265 |
+
t_only = sub[sub['trim'] == trim_generic]
|
| 266 |
+
if not t_only.empty: return pick(t_only)
|
| 267 |
+
|
| 268 |
+
# 4) fallback
|
| 269 |
+
return pick(sub)
|
| 270 |
+
|
| 271 |
+
def recommend(make, model, trim_display, year, topk, alpha,
|
| 272 |
+
body, fuel, y_min, y_max, p_min, p_max, safety, reliab,
|
| 273 |
+
cross_brand_only=True, exclude_same_model=True):
|
| 274 |
+
a = anchor_row(make, model, trim_display, year)
|
| 275 |
+
if a is None:
|
| 276 |
+
return "No match for that combo.", None, "", None
|
| 277 |
+
|
| 278 |
+
# candidate pool
|
| 279 |
+
pool = DF.copy()
|
| 280 |
+
if cross_brand_only:
|
| 281 |
+
pool = pool[pool['make'] != a['make']]
|
| 282 |
+
if exclude_same_model:
|
| 283 |
+
pool = pool[~((pool['make'] == a['make']) & (pool['model'] == a['model']))]
|
| 284 |
+
|
| 285 |
+
pool = apply_filters(pool, body, fuel, int(y_min), int(y_max), int(p_min), int(p_max), int(safety), int(reliab))
|
| 286 |
+
if pool.empty:
|
| 287 |
+
return "No cars after filters. Try widening year/price/safety.", None, "", None
|
| 288 |
+
|
| 289 |
+
Etext, Enum = ensure_emb()
|
| 290 |
+
idx_anchor = int(a.name)
|
| 291 |
+
cand_idx = pool.index.values
|
| 292 |
+
st = cosine_similarity(Etext[idx_anchor:idx_anchor+1], Etext[cand_idx])[0]
|
| 293 |
+
sn = cosine_similarity(Enum[idx_anchor:idx_anchor+1], Enum[cand_idx])[0]
|
| 294 |
s = float(alpha)*st + (1-float(alpha))*sn
|
| 295 |
+
|
| 296 |
+
order = np.argsort(-s)
|
| 297 |
+
seen = set(); chosen = []
|
| 298 |
+
for j in order:
|
| 299 |
+
r = DF.loc[cand_idx[j]]
|
| 300 |
+
key = (r['make'], r['model'])
|
| 301 |
+
if key in seen: continue
|
| 302 |
+
seen.add(key); chosen.append(cand_idx[j])
|
| 303 |
+
if len(chosen) >= int(topk): break
|
| 304 |
+
|
| 305 |
+
if not chosen:
|
| 306 |
+
return "No recommendations found after constraints.", None, "", None
|
| 307 |
+
|
| 308 |
+
sel = DF.loc[chosen].copy()
|
| 309 |
+
# map generic -> display for table
|
| 310 |
+
sel['trim_display'] = sel.apply(lambda r: generic_to_display(r['make'], r['model'], r['trim']), axis=1)
|
| 311 |
+
sim_lookup = {cand_idx[j]: round(float(s[j])*100, 1) for j in order}
|
| 312 |
+
sel['similarity_%'] = sel.index.map(lambda k: sim_lookup.get(k, 0.0))
|
| 313 |
+
|
| 314 |
+
cols = [
|
| 315 |
+
'name','make','model','trim_display','year','body_type','fuel','engine_type',
|
| 316 |
+
'price_usd','horsepower','zero_to_100_kmh_s',
|
| 317 |
+
'popularity_score','comfort_score','reliability_score',
|
| 318 |
+
'tech_score','ownership_cost_score','safety_rating','similarity_%'
|
| 319 |
+
]
|
| 320 |
+
note = (f"α = {float(alpha):.2f} (text ↔ numeric) • Cross-brand only = {cross_brand_only} "
|
| 321 |
+
f"• Exclude same model = {exclude_same_model}")
|
| 322 |
+
gallery = build_gallery_html(sel)
|
| 323 |
+
|
| 324 |
+
return fmt_anchor(a), sel[cols], note, gallery
|
| 325 |
+
|
| 326 |
+
# ============
|
| 327 |
+
# Gradio UI
|
| 328 |
+
# ============
|
| 329 |
+
def build_ui():
|
| 330 |
+
y_lo, y_hi = int(DF['year'].min()), int(DF['year'].max())
|
| 331 |
+
p_lo, p_hi = int(DF['price_usd'].min()), int(DF['price_usd'].max())
|
| 332 |
+
|
| 333 |
+
with gr.Blocks() as demo:
|
| 334 |
+
gr.Markdown("# RideSearch — cross-brand recommendations with real trims")
|
| 335 |
+
|
| 336 |
+
with gr.Tab("Pick & Recommend"):
|
| 337 |
+
with gr.Row():
|
| 338 |
+
mk = gr.Dropdown(sorted(DF['make'].dropna().unique().tolist()), label="Make", value=None)
|
| 339 |
+
md = gr.Dropdown([], label="Model", value=None)
|
| 340 |
+
tr = gr.Dropdown([], label="Trim (optional)", value=None)
|
| 341 |
+
yr = gr.Dropdown([], label="Year (optional)", value=None)
|
| 342 |
+
mk.change(models_for, mk, md)
|
| 343 |
+
md.change(on_model_change, [mk, md], [tr, yr])
|
| 344 |
+
|
| 345 |
+
with gr.Row():
|
| 346 |
+
body = gr.Dropdown(['Any'] + sorted(DF['body_type'].dropna().unique().tolist()),
|
| 347 |
+
value='Any', label='Body')
|
| 348 |
+
fuel = gr.Dropdown(['Any'] + sorted(DF['fuel'].dropna().unique().tolist()),
|
| 349 |
+
value='Any', label='Fuel')
|
| 350 |
+
|
| 351 |
+
with gr.Row():
|
| 352 |
+
y_min = gr.Slider(y_lo, y_hi, value=y_lo, step=1, label='Year min')
|
| 353 |
+
y_max = gr.Slider(y_lo, y_hi, value=y_hi, step=1, label='Year max')
|
| 354 |
+
|
| 355 |
+
with gr.Row():
|
| 356 |
+
p_min = gr.Slider(p_lo, p_hi, value=p_lo, step=500, label='Price min (USD)')
|
| 357 |
+
p_max = gr.Slider(p_lo, p_hi, value=min(p_hi, 80000), step=500, label='Price max (USD)')
|
| 358 |
+
|
| 359 |
+
with gr.Row():
|
| 360 |
+
safety = gr.Slider(3, 5, value=4, step=1, label='Min Safety ★')
|
| 361 |
+
reliab = gr.Slider(55, 99, value=70, step=1, label='Min Reliability')
|
| 362 |
+
|
| 363 |
+
with gr.Row():
|
| 364 |
+
topk = gr.Slider(1, 10, value=5, step=1, label='Recommendations')
|
| 365 |
+
alpha = gr.Slider(0, 1, value=0.7, step=0.05, label='α — Text vs Numeric')
|
| 366 |
+
|
| 367 |
+
with gr.Row():
|
| 368 |
+
cross = gr.Checkbox(label="Cross-brand only", value=True)
|
| 369 |
+
xmodel = gr.Checkbox(label="Exclude same model family", value=True)
|
| 370 |
+
|
| 371 |
+
go = gr.Button("Recommend")
|
| 372 |
+
anchor_md = gr.Markdown()
|
| 373 |
+
table = gr.Dataframe(interactive=False)
|
| 374 |
+
note = gr.Markdown()
|
| 375 |
+
gallery = gr.HTML()
|
| 376 |
+
|
| 377 |
+
go.click(
|
| 378 |
+
recommend,
|
| 379 |
+
[mk, md, tr, yr, topk, alpha, body, fuel, y_min, y_max, p_min, p_max, safety, reliab, cross, xmodel],
|
| 380 |
+
[anchor_md, table, note, gallery]
|
| 381 |
+
)
|
| 382 |
+
|
| 383 |
+
gr.Markdown("Tip: If you don’t add images, you still get clean placeholders. "
|
| 384 |
+
"Add an 'image_url' column to your CSV to show real photos.")
|
| 385 |
+
|
| 386 |
+
return demo
|
| 387 |
+
|
| 388 |
+
demo = build_ui()
|
| 389 |
+
|
| 390 |
+
if __name__ == "__main__":
|
| 391 |
+
demo.queue().launch(server_name="0.0.0.0", server_port=7860)
|