|
|
|
|
|
import pandas as pd |
|
|
import numpy as np |
|
|
import gradio as gr |
|
|
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder |
|
|
from sklearn.metrics.pairwise import cosine_similarity |
|
|
from sklearn.neighbors import NearestNeighbors |
|
|
|
|
|
CSV_PATH = "all-vehicles-model@public.csv" |
|
|
|
|
|
|
|
|
def read_csv_safely(path): |
|
|
try: |
|
|
return pd.read_csv(path, sep=";", encoding="utf-8", engine="python") |
|
|
except Exception: |
|
|
return pd.read_csv(path, encoding="utf-8") |
|
|
|
|
|
df = read_csv_safely(CSV_PATH) |
|
|
|
|
|
|
|
|
FUEL_COL = "Fuel Type1" if "Fuel Type1" in df.columns else ("Fuel Type" if "Fuel Type" in df.columns else None) |
|
|
TRANS_COL = "Transmission" if "Transmission" in df.columns else None |
|
|
DRIVE_COL = "Drive" if "Drive" in df.columns else None |
|
|
YEAR_COL = "Year" if "Year" in df.columns else None |
|
|
MAKE_COL = "Make" if "Make" in df.columns else None |
|
|
MODEL_COL = "Model" if "Model" in df.columns else None |
|
|
|
|
|
|
|
|
for c in [ |
|
|
"City Mpg For Fuel Type1","Highway Mpg For Fuel Type1","Combined Mpg For Fuel Type1", |
|
|
"Annual Fuel Cost For Fuel Type1","Epa Range For Fuel Type2","Engine displacement","Cylinders", |
|
|
YEAR_COL |
|
|
]: |
|
|
if c and c in df.columns: |
|
|
df[c] = pd.to_numeric(df[c], errors="coerce") |
|
|
|
|
|
|
|
|
def unique_opts(col, default=None, limit=120): |
|
|
if col and col in df.columns: |
|
|
vals = ( |
|
|
df[col].dropna().astype(str).str.strip() |
|
|
.replace("", pd.NA).dropna().unique().tolist() |
|
|
) |
|
|
vals = sorted(vals)[:limit] |
|
|
if default is None: |
|
|
return vals |
|
|
return [default] + vals |
|
|
return [default] if default else [] |
|
|
|
|
|
TRANS_OPTS = ["No preference", "Automatic (all)", "Manual (all)"] + unique_opts(TRANS_COL, None) |
|
|
FUEL_OPTS = unique_opts(FUEL_COL, "No preference") |
|
|
|
|
|
_drive_from_data = unique_opts(DRIVE_COL, None) |
|
|
if not _drive_from_data: |
|
|
_drive_from_data = ["2-Wheel Drive","4-Wheel Drive","All Wheel Drive"] |
|
|
DRIVE_OPTS = ["No preference"] + _drive_from_data |
|
|
|
|
|
|
|
|
def filter_by_fuel(df_in, sel): |
|
|
if not FUEL_COL or sel == "No preference": |
|
|
return df_in |
|
|
s = df_in[FUEL_COL].astype(str) |
|
|
return df_in[s.str.contains(sel, case=False, na=False)] |
|
|
|
|
|
def filter_by_transmission(df_in, sel): |
|
|
if not TRANS_COL or sel == "No preference": |
|
|
return df_in |
|
|
s = df_in[TRANS_COL].astype(str) |
|
|
if sel == "Automatic (all)": |
|
|
return df_in[s.str.contains("auto", case=False, na=False)] |
|
|
if sel == "Manual (all)": |
|
|
return df_in[s.str.contains("man", case=False, na=False)] |
|
|
return df_in[s.str.strip().str.lower() == sel.strip().lower()] |
|
|
|
|
|
def filter_by_drive(df_in, sel): |
|
|
if not DRIVE_COL or sel == "No preference": |
|
|
return df_in |
|
|
s = df_in[DRIVE_COL].astype(str) |
|
|
return df_in[s.str.contains(sel, case=False, na=False)] |
|
|
|
|
|
def filter_by_vintage(df_in, kind): |
|
|
if not YEAR_COL or kind == "No preference": |
|
|
return df_in |
|
|
cutoff = pd.Timestamp.now().year - 30 |
|
|
y = pd.to_numeric(df_in[YEAR_COL], errors="coerce") |
|
|
if kind == "Vintage": |
|
|
return df_in[y <= cutoff] |
|
|
if kind == "Regular": |
|
|
return df_in[y > cutoff] |
|
|
return df_in |
|
|
|
|
|
|
|
|
NUM_COLS = [c for c in [ |
|
|
"City Mpg For Fuel Type1","Highway Mpg For Fuel Type1","Combined Mpg For Fuel Type1", |
|
|
"Annual Fuel Cost For Fuel Type1","Epa Range For Fuel Type2","Engine displacement","Cylinders", |
|
|
YEAR_COL |
|
|
] if c and c in df.columns] |
|
|
|
|
|
CAT_COLS = [c for c in [FUEL_COL,DRIVE_COL] if c and c in df.columns] |
|
|
|
|
|
if NUM_COLS: |
|
|
_scaler = MinMaxScaler() |
|
|
num_mat = _scaler.fit_transform( |
|
|
df[NUM_COLS].apply(pd.to_numeric, errors="coerce").fillna(df[NUM_COLS].median()) |
|
|
) |
|
|
else: |
|
|
_scaler = None |
|
|
num_mat = np.zeros((len(df), 0)) |
|
|
|
|
|
if CAT_COLS: |
|
|
_enc = OneHotEncoder(handle_unknown="ignore", sparse_output=False) |
|
|
cat_mat = _enc.fit_transform(df[CAT_COLS].astype(str)) |
|
|
else: |
|
|
_enc = None |
|
|
cat_mat = np.zeros((len(df), 0)) |
|
|
|
|
|
FEATURES = np.concatenate([num_mat, cat_mat], axis=1) if len(df) else np.zeros((0,0)) |
|
|
|
|
|
|
|
|
def user_vector(params): |
|
|
if NUM_COLS: |
|
|
base = {} |
|
|
if "Combined Mpg For Fuel Type1" in NUM_COLS: |
|
|
base["Combined Mpg For Fuel Type1"] = df["Combined Mpg For Fuel Type1"].max() |
|
|
if "City Mpg For Fuel Type1" in NUM_COLS: |
|
|
base["City Mpg For Fuel Type1"] = df["City Mpg For Fuel Type1"].median() |
|
|
if "Highway Mpg For Fuel Type1" in NUM_COLS: |
|
|
base["Highway Mpg For Fuel Type1"] = df["Highway Mpg For Fuel Type1"].median() |
|
|
if "Annual Fuel Cost For Fuel Type1" in NUM_COLS: |
|
|
base["Annual Fuel Cost For Fuel Type1"] = df["Annual Fuel Cost For Fuel Type1"].min() |
|
|
if "Epa Range For Fuel Type2" in NUM_COLS: |
|
|
base["Epa Range For Fuel Type2"] = df["Epa Range For Fuel Type2"].max() |
|
|
if "Engine displacement" in NUM_COLS: |
|
|
base["Engine displacement"] = df["Engine displacement"].median() |
|
|
if "Cylinders" in NUM_COLS: |
|
|
base["Cylinders"] = df["Cylinders"].median() |
|
|
if YEAR_COL in NUM_COLS: |
|
|
cutoff = pd.Timestamp.now().year - 30 |
|
|
base[YEAR_COL] = df[YEAR_COL].max() if params.get("vintage")=="Regular" else cutoff |
|
|
for c in NUM_COLS: |
|
|
if c not in base: |
|
|
base[c] = df[c].median() |
|
|
user_num = _scaler.transform(pd.DataFrame([base])[NUM_COLS]) if _scaler else np.zeros((1,0)) |
|
|
else: |
|
|
user_num = np.zeros((1,0)) |
|
|
|
|
|
if CAT_COLS: |
|
|
crow = {c:"" for c in CAT_COLS} |
|
|
if FUEL_COL in CAT_COLS and params.get("fuel_pref") and params["fuel_pref"]!="No preference": |
|
|
crow[FUEL_COL] = params["fuel_pref"] |
|
|
if DRIVE_COL in CAT_COLS and params.get("drive_pref") and params["drive_pref"]!="No preference": |
|
|
crow[DRIVE_COL] = params["drive_pref"] |
|
|
user_cat = _enc.transform(pd.DataFrame([crow])[CAT_COLS].astype(str)) |
|
|
else: |
|
|
user_cat = np.zeros((1,0)) |
|
|
|
|
|
return np.concatenate([user_num, user_cat], axis=1) |
|
|
|
|
|
|
|
|
def knn_scores_for_filtered(filtered_idx, user_vec, k=50): |
|
|
feats = FEATURES[filtered_idx, :] |
|
|
if feats.size == 0: |
|
|
return np.array([], dtype=int), np.array([]) |
|
|
n = int(min(k, max(1, feats.shape[0]))) |
|
|
nn = NearestNeighbors(n_neighbors=n, metric="cosine") |
|
|
nn.fit(feats) |
|
|
dists, inds = nn.kneighbors(user_vec, return_distance=True) |
|
|
sims = 1.0 - dists[0] |
|
|
order = inds[0] |
|
|
return order, sims |
|
|
|
|
|
|
|
|
def recommend(usage, daily_km, budget_fuel, fuel_pref, drive_pref, trans_pref, |
|
|
cargo_need, perf_pref, seats_min, vintage_sel): |
|
|
data = df.copy() |
|
|
|
|
|
data = filter_by_fuel(data, fuel_pref) |
|
|
data = filter_by_transmission(data, trans_pref) |
|
|
data = filter_by_drive(data, drive_pref) |
|
|
data = filter_by_vintage(data, vintage_sel) |
|
|
|
|
|
if len(data)==0 and trans_pref!="No preference": |
|
|
data = filter_by_transmission(df.copy(), "No preference") |
|
|
data = filter_by_fuel(data, fuel_pref) |
|
|
data = filter_by_drive(data, drive_pref) |
|
|
data = filter_by_vintage(data, vintage_sel) |
|
|
if len(data)==0 and drive_pref!="No preference": |
|
|
data = filter_by_drive(df.copy(), "No preference") |
|
|
data = filter_by_fuel(data, fuel_pref) |
|
|
data = filter_by_transmission(data, trans_pref) |
|
|
data = filter_by_vintage(data, vintage_sel) |
|
|
if len(data)==0 and fuel_pref!="No preference": |
|
|
data = filter_by_fuel(df.copy(), "No preference") |
|
|
data = filter_by_transmission(data, trans_pref) |
|
|
data = filter_by_drive(data, drive_pref) |
|
|
data = filter_by_vintage(data, vintage_sel) |
|
|
if len(data)==0: |
|
|
return pd.DataFrame(columns=[MAKE_COL,MODEL_COL,YEAR_COL,FUEL_COL,TRANS_COL,DRIVE_COL,"Match Score"]), "诇讗 谞诪爪讗讜 专讻讘讬诐. 谞住讛 诇讛专讞讬讘 讘讞讬专讜转." |
|
|
|
|
|
idx_arr = data.index.to_numpy() |
|
|
u = user_vector(dict(fuel_pref=fuel_pref, drive_pref=drive_pref, vintage=vintage_sel)) |
|
|
|
|
|
try: |
|
|
order, sims = knn_scores_for_filtered(idx_arr, u, k=50) |
|
|
if sims.size == 0: |
|
|
feats = FEATURES[idx_arr, :] |
|
|
sims = cosine_similarity(u, feats)[0] if feats.size else np.zeros(len(idx_arr)) |
|
|
order = np.argsort(-sims) |
|
|
except Exception: |
|
|
feats = FEATURES[idx_arr, :] |
|
|
sims = cosine_similarity(u, feats)[0] if feats.size else np.zeros(len(idx_arr)) |
|
|
order = np.argsort(-sims) |
|
|
|
|
|
data = data.iloc[order].copy() |
|
|
data["Match Score"] = (sims * 100).round(2) |
|
|
|
|
|
cols_show = [c for c in [ |
|
|
MAKE_COL, MODEL_COL, YEAR_COL, FUEL_COL, TRANS_COL, DRIVE_COL, |
|
|
"City Mpg For Fuel Type1" if "City Mpg For Fuel Type1" in data.columns else None, |
|
|
"Highway Mpg For Fuel Type1" if "Highway Mpg For Fuel Type1" in data.columns else None, |
|
|
"Combined Mpg For Fuel Type1" if "Combined Mpg For Fuel Type1" in data.columns else None, |
|
|
"Annual Fuel Cost For Fuel Type1" if "Annual Fuel Cost For Fuel Type1" in data.columns else None, |
|
|
"Epa Range For Fuel Type2" if "Epa Range For Fuel Type2" in data.columns else None, |
|
|
"Match Score" |
|
|
] if c] |
|
|
top = data.head(10)[cols_show] |
|
|
return top, f"谞诪爪讗讜 {len(data)} 讚讙诪讬诐 讗讞专讬 住讬谞讜谉. 诪讜爪讙讬诐 讟讜驻 {len(top)}." |
|
|
|
|
|
|
|
|
with gr.Blocks(title="诪诪诇讬抓 专讻讘讬诐 讞讻诐") as demo: |
|
|
gr.Markdown("# 诪诪诇讬抓 专讻讘讬诐 讞讻诐\n讘讞专 讛注讚驻讜转 讜拽讘诇 讛转讗诪讜转 注诐 爪讬讜谉 AI. 讗驻砖专 诇讘讞讜专 讙讬专 讻诇诇讬 讗讜 讚讙诐 住驻爪讬驻讬.") |
|
|
|
|
|
with gr.Row(): |
|
|
usage = gr.Radio(["注讬专","讘讬谞注讬专讜谞讬","诪注讜专讘"], value="诪注讜专讘", label="讗讜驻讬 砖讬诪讜砖") |
|
|
daily_km = gr.Slider(0, 1000, value=20, step=5, label="谞住讜注讛 讬讜诪讬转 诪诪讜爪注转 讘拽讬诇讜诪讟专讬诐") |
|
|
budget_fuel = gr.Slider(0, 10000, value=5000, step=100, label="转拽爪讬讘 讚诇拽 讗讜 讞砖诪诇 砖谞转讬") |
|
|
|
|
|
with gr.Row(): |
|
|
fuel_pref = gr.Dropdown(FUEL_OPTS, value="No preference", label="注讚讬驻讜转 诇住讜讙 讚诇拽") |
|
|
drive_pref = gr.Dropdown(DRIVE_OPTS, value="No preference", label="讛谞注讛") |
|
|
trans_pref = gr.Dropdown(TRANS_OPTS, value="No preference", label="转讬讘转 讛讬诇讜讻讬诐") |
|
|
|
|
|
with gr.Row(): |
|
|
cargo_need = gr.Slider(0, 800, value=360, step=20, label="谞驻讞 诪讟注谉 诪讬谞讬诪诇讬 专爪讜讬") |
|
|
perf_pref = gr.Slider(0, 1, value=0.85, step=0.05, label="注讚讬驻讜转 1 讘讬爪讜注讬诐 0 讞住讻讜谉") |
|
|
seats_min = gr.Slider(2, 8, value=2, step=1, label="诪讜砖讘讬诐 诪讬谞讬诪讜诐") |
|
|
|
|
|
vintage = gr.Radio(["No preference","Regular","Vintage"], value="No preference", label="住讜讙 专讻讘") |
|
|
|
|
|
btn = gr.Button("诪爪讗 专讻讘讬诐") |
|
|
out_tbl = gr.Dataframe(interactive=False, wrap=True, label="讛转讗诪讜转 诪讜诪诇爪讜转 . 讟讜驻 10") |
|
|
status = gr.Markdown("") |
|
|
|
|
|
btn.click( |
|
|
fn=recommend, |
|
|
inputs=[usage, daily_km, budget_fuel, fuel_pref, drive_pref, trans_pref, cargo_need, perf_pref, seats_min, vintage], |
|
|
outputs=[out_tbl, status] |
|
|
) |
|
|
|
|
|
demo.queue().launch() |
|
|
|