Spaces:

Levimichael4
/

RideSearch

Sleeping

App Files Files Community

Levimichael4 commited on Aug 10, 2025

Commit

9bf7679

verified ·

1 Parent(s): af839dc

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -327

app.py DELETED Viewed

@@ -1,327 +0,0 @@
-# app_new.py  — RideSearch (cross-brand, brand-correct trims, smart fallbacks)
-import os, glob
-import numpy as np
-import pandas as pd
-from sklearn.metrics.pairwise import cosine_similarity
-from sklearn.preprocessing import StandardScaler
-import gradio as gr
-# =========================
-# Data loading & embeddings
-# =========================
-def load_df():
-    """
-    Load merged dataset if present. Otherwise merge small parts (part*_small.csv).
-    """
-    if os.path.exists('RideSearch_dataset.csv'):
-        return pd.read_csv('RideSearch_dataset.csv')
-    parts = sorted(glob.glob('RideSearch_part*_small.csv'))
-    if not parts:
-        raise FileNotFoundError(
-            "Upload RideSearch_dataset.csv OR the 10 parts RideSearch_part*_small.csv."
-        )
-    df = pd.concat([pd.read_csv(p) for p in parts], ignore_index=True)
-    df.to_csv('RideSearch_dataset.csv', index=False)
-    return df
-DF = load_df()
-# numeric columns used for numeric embedding (adjust if your CSV differs)
-NUM_COLS = [
-    'horsepower','zero_to_100_kmh_s','seats','cargo_liters','price_usd',
-    'popularity_score','comfort_score','reliability_score','tech_score',
-    'ownership_cost_score','safety_rating'
-]
-def ensure_emb():
-    """
-    Load or create text + numeric embeddings.
-    Text uses all-MiniLM-L6-v2 on DF['text_record'].
-    Numeric is StandardScaler on NUM_COLS (with 0-100 reversed for acceleration).
-    """
-    txt_ok = os.path.exists('emb_text.npy')
-    num_ok = os.path.exists('emb_num.npy')
-    if txt_ok and num_ok:
-        return np.load('emb_text.npy'), np.load('emb_num.npy')
-    # --- build on first run ---
-    from sentence_transformers import SentenceTransformer
-    m = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
-    texts = DF['text_record'].astype(str).tolist()
-    Etext = m.encode(texts, batch_size=256, show_progress_bar=True, normalize_embeddings=True)
-    Etext = np.asarray(Etext, dtype='float32')
-    np.save('emb_text.npy', Etext)
-    X = DF[NUM_COLS].copy()
-    # faster 0–100 → lower-better; invert accel so larger is better for similarity
-    if 'zero_to_100_kmh_s' in X.columns:
-        X['zero_to_100_kmh_s'] = -X['zero_to_100_kmh_s'].astype('float32')
-    Xs = StandardScaler().fit_transform(X.values.astype('float32'))
-    Enum = Xs.astype('float32')
-    np.save('emb_num.npy', Enum)
-    return Etext, Enum
-# ==========================================
-# Brand-correct trim display & alias mapping
-# ==========================================
-TRIM_CHOICES = {
-    ("BMW","3 Series"): ["320i","330i","330e","340i","M3"],
-    ("Audi","A3"): ["35 TFSI","40 TFSI","45 TFSI","S3","RS3"],
-    ("Audi","A4"): ["35 TFSI","40 TFSI","45 TFSI","S4","RS4"],
-    ("Mercedes-Benz","C-Class"): ["C200","C220d","C300","AMG C43","AMG C63"],
-    ("Lexus","IS"): ["IS 300","IS 350","IS 500 F SPORT"],
-    ("Toyota","Corolla"): ["L","LE","SE","XSE","GR"],
-    ("Honda","Civic"): ["LX","Sport","EX","Touring","Type R"],
-    ("Volkswagen","Golf"): ["Trendline","Comfortline","Highline","GTI","R"],
-    ("Hyundai","Elantra"): ["SE","SEL","Limited","N Line","N"],
-    ("Kia","Forte"): ["LX","S","EX","GT-Line","GT"],
-    # add more pairs you plan to demo
-}
-# Map those display trims to your dataset’s generic trim tokens
-TRIM_ALIAS_TO_GENERIC = {
-    # BMW 3
-    "320i":"Base","330i":"Sport","330e":"Sport","340i":"Premium","M3":"Performance",
-    # Audi A3/A4
-    "35 TFSI":"Base","40 TFSI":"Sport","45 TFSI":"Premium","S3":"Performance","RS3":"Performance",
-    "S4":"Performance","RS4":"Performance",
-    # Mercedes C
-    "C200":"Base","C220d":"Base","C300":"Premium","AMG C43":"Performance","AMG C63":"Performance",
-    # Lexus IS
-    "IS 300":"Base","IS 350":"Premium","IS 500 F SPORT":"Performance",
-    # Toyota Corolla
-    "L":"Base","LE":"Base","SE":"Sport","XSE":"Premium","GR":"Performance",
-    # Honda Civic
-    "LX":"Base","Sport":"Sport","EX":"Premium","Touring":"Premium","Type R":"Performance",
-    # VW Golf
-    "Trendline":"Base","Comfortline":"Base","Highline":"Premium","GTI":"Performance","R":"Performance",
-    # Hyundai Elantra
-    "SE":"Base","SEL":"Base","Limited":"Premium","N Line":"Sport","N":"Performance",
-    # Kia Forte
-    "LX":"Base","S":"Sport","EX":"Premium","GT-Line":"Sport","GT":"Performance",
-}
-# ==============================
-# Helpers: dropdowns & filtering
-# ==============================
-def models_for(make):
-    if not make:
-        return gr.update(choices=[], value=None)
-    opts = sorted(DF.loc[DF['make'].eq(make), 'model'].dropna().unique().tolist())
-    return gr.update(choices=opts, value=None)
-def trim_year(make, model):
-    # Trims (brand-correct if we have them; otherwise from DF)
-    if make and model and (make, model) in TRIM_CHOICES:
-        trims = TRIM_CHOICES[(make, model)]
-    else:
-        sub = DF
-        if make:  sub = sub[sub['make'] == make]
-        if model: sub = sub[sub['model'] == model]
-        trims = sorted(sub['trim'].astype(str).dropna().unique().tolist())[:20]
-    # Years
-    if make and model:
-        years = sorted(
-            DF.loc[(DF['make'].eq(make)) & (DF['model'].eq(model)), 'year']
-              .dropna().astype(int).unique().tolist()
-        )
-    else:
-        years = []
-    return trims, years
-def on_model_change(make, model):
-    trims, years = trim_year(make, model)
-    return gr.update(choices=trims, value=None), gr.update(choices=years, value=None)
-def normalize_trim_for_query(make, model, display_trim):
-    """Map pretty display trims back to dataset generic tokens (Base/Sport/...)."""
-    if not display_trim:
-        return None
-    if (make, model) in TRIM_CHOICES and display_trim in TRIM_ALIAS_TO_GENERIC:
-        return TRIM_ALIAS_TO_GENERIC[display_trim]
-    return display_trim
-def apply_filters(df, body, fuel, y_min, y_max, p_min, p_max, safety, reliab):
-    out = df.copy()
-    if body != 'Any': out = out[out['body_type'] == body]
-    if fuel != 'Any': out = out[out['fuel'] == fuel]
-    out = out[(out['year'] >= y_min) & (out['year'] <= y_max)]
-    out = out[(out['price_usd'] >= p_min) & (out['price_usd'] <= p_max)]
-    out = out[(out['safety_rating'] >= safety) & (out['reliability_score'] >= reliab)]
-    return out
-def fmt_anchor(r):
-    return (f"**{r['name']}**  \n"
-            f"Brand: {r['make']}  •  Model: {r['model']}  •  Trim: {r['trim']}  •  Year: {r['year']}  \n"
-            f"Body: {r['body_type']}  •  Fuel: {r['fuel']}  •  Engine: {r['engine_type']}  \n"
-            f"HP: {int(r['horsepower'])}  •  0–100: {r['zero_to_100_kmh_s']}s  •  Price: ${int(r['price_usd']):,}  \n"
-            f"Popularity {int(r['popularity_score'])}/10  •  Comfort {int(r['comfort_score'])}/10  •  "
-            f"Reliability {int(r['reliability_score'])}/100  •  Safety {int(r['safety_rating'])}★")
-# ===========================
-# Anchor selection & ranking
-# ===========================
-def anchor_row(make, model, trim_display, year):
-    """Pick the anchor row with graceful fallbacks so we never dead-end."""
-    trim_generic = normalize_trim_for_query(make, model, trim_display)
-    sub = DF.copy()
-    if make:  sub = sub[sub['make'] == make]
-    if model: sub = sub[sub['model'] == model]
-    def pick(df_):
-        return None if df_.empty else df_.sort_values('popularity_score', ascending=False).iloc[0]
-    # 1) exact
-    exact = sub.copy()
-    if trim_generic: exact = exact[exact['trim'] == trim_generic]
-    if year:         exact = exact[exact['year'] == year]
-    if not exact.empty: return pick(exact)
-    # 2) same year (ignore trim)
-    if year:
-        y_only = sub[sub['year'] == year]
-        if not y_only.empty: return pick(y_only)
-    # 3) same trim (ignore year)
-    if trim_generic:
-        t_only = sub[sub['trim'] == trim_generic]
-        if not t_only.empty: return pick(t_only)
-    # 4) fallback: best for that make+model
-    return pick(sub)
-def recommend(make, model, trim_display, year, topk, alpha,
-              body, fuel, y_min, y_max, p_min, p_max, safety, reliab,
-              cross_brand_only=True, exclude_same_model=True):
-    a = anchor_row(make, model, trim_display, year)
-    if a is None:
-        return "No match for that combo.", None, None
-    # candidate pool
-    pool = DF.copy()
-    if cross_brand_only:
-        pool = pool[pool['make'] != a['make']]
-    if exclude_same_model:
-        pool = pool[~((pool['make'] == a['make']) & (pool['model'] == a['model']))]
-    pool = apply_filters(pool, body, fuel, int(y_min), int(y_max), int(p_min), int(p_max), int(safety), int(reliab))
-    if pool.empty:
-        return "No cars after filters. Try widening year/price/safety.", None, None
-    Etext, Enum = ensure_emb()
-    idx_anchor = int(a.name)
-    cand_idx = pool.index.values
-    st = cosine_similarity(Etext[idx_anchor:idx_anchor+1], Etext[cand_idx])[0]
-    sn = cosine_similarity(Enum[idx_anchor:idx_anchor+1],  Enum[cand_idx])[0]
-    s = float(alpha)*st + (1-float(alpha))*sn
-    # rank, enforce unique (brand, model) combos
-    order = np.argsort(-s)
-    seen = set()
-    chosen = []
-    for j in order:
-        r = DF.loc[cand_idx[j]]
-        key = (r['make'], r['model'])
-        if key in seen:
-            continue
-        seen.add(key)
-        chosen.append(cand_idx[j])
-        if len(chosen) >= int(topk):
-            break
-    if not chosen:
-        return "No recommendations found after constraints.", None, None
-    sel = DF.loc[chosen].copy()
-    sim_lookup = {cand_idx[j]: round(float(s[j])*100, 1) for j in order}
-    sel['similarity_%'] = sel.index.map(lambda k: sim_lookup.get(k, 0.0))
-    cols = [
-        'name','make','model','trim','year','body_type','fuel','engine_type',
-        'price_usd','horsepower','zero_to_100_kmh_s',
-        'popularity_score','comfort_score','reliability_score',
-        'tech_score','ownership_cost_score','safety_rating','similarity_%'
-    ]
-    note = (f"α = {float(alpha):.2f} (text ↔ numeric) • Cross-brand only = {cross_brand_only} "
-            f"• Exclude same model = {exclude_same_model}")
-    return fmt_anchor(a), sel[cols], note
-# ============
-# Gradio UI
-# ============
-def build_ui():
-    y_lo, y_hi = int(DF['year'].min()), int(DF['year'].max())
-    p_lo, p_hi = int(DF['price_usd'].min()), int(DF['price_usd'].max())
-    with gr.Blocks() as demo:
-        gr.Markdown("# RideSearch — pick a car, get **cross-brand** similar options")
-        with gr.Tab("Pick & Recommend"):
-            with gr.Row():
-                mk = gr.Dropdown(sorted(DF['make'].dropna().unique().tolist()), label="Make")
-                md = gr.Dropdown([], label="Model")
-                tr = gr.Dropdown([], label="Trim (optional)")
-                yr = gr.Dropdown([], label="Year (optional)")
-            mk.change(models_for, mk, md)
-            md.change(on_model_change, [mk, md], [tr, yr])
-            with gr.Row():
-                body = gr.Dropdown(['Any'] + sorted(DF['body_type'].dropna().unique().tolist()),
-                                   value='Any', label='Body')
-                fuel = gr.Dropdown(['Any'] + sorted(DF['fuel'].dropna().unique().tolist()),
-                                   value='Any', label='Fuel')
-            with gr.Row():
-                y_min = gr.Slider(y_lo, y_hi, value=y_lo, step=1, label='Year min')
-                y_max = gr.Slider(y_lo, y_hi, value=y_hi, step=1, label='Year max')
-            with gr.Row():
-                p_min = gr.Slider(p_lo, p_hi, value=p_lo, step=500, label='Price min (USD)')
-                p_max = gr.Slider(p_lo, p_hi, value=min(p_hi, 80000), step=500, label='Price max (USD)')
-            with gr.Row():
-                safety = gr.Slider(3, 5, value=4, step=1, label='Min Safety ★')
-                reliab = gr.Slider(55, 99, value=70, step=1, label='Min Reliability')
-            with gr.Row():
-                topk = gr.Slider(1, 10, value=5, step=1, label='Recommendations')
-                alpha = gr.Slider(0, 1, value=0.7, step=0.05, label='α — Text vs Numeric')
-            with gr.Row():
-                cross = gr.Checkbox(label="Cross-brand only", value=True)
-                xmodel = gr.Checkbox(label="Exclude same model family", value=True)
-            go = gr.Button("Recommend")
-            anchor_md = gr.Markdown()
-            table = gr.Dataframe(interactive=False)
-            note = gr.Markdown()
-            go.click(
-                recommend,
-                [mk, md, tr, yr, topk, alpha, body, fuel, y_min, y_max, p_min, p_max, safety, reliab, cross, xmodel],
-                [anchor_md, table, note]
-            )
-        gr.Markdown("Tip: Leave Trim/Year empty if you’re not sure — the app will fall back smartly.")
-    return demo
-demo = build_ui()
-if __name__ == "__main__":
-    # Works locally and on Hugging Face Spaces
-    demo.queue().launch(server_name="0.0.0.0", server_port=7860)