Spaces:

Levimichael4
/

RideSearch

Sleeping

App Files Files Community

Levimichael4 commited on Aug 10, 2025

Commit

af839dc

verified ·

1 Parent(s): 8c88b24

Update app.py

Browse files

Files changed (1) hide show

app.py +293 -145

app.py CHANGED Viewed

@@ -1,179 +1,327 @@
-import os, glob, numpy as np, pandas as pd
 from sklearn.metrics.pairwise import cosine_similarity
 from sklearn.preprocessing import StandardScaler
 import gradio as gr
-# ---------- data loading ----------
 def load_df():
     if os.path.exists('RideSearch_dataset.csv'):
         return pd.read_csv('RideSearch_dataset.csv')
     parts = sorted(glob.glob('RideSearch_part*_small.csv'))
-    if parts:
-        df = pd.concat([pd.read_csv(p) for p in parts], ignore_index=True)
-        df.to_csv('RideSearch_dataset.csv', index=False)
-        return df
-    raise FileNotFoundError('Upload dataset (RideSearch_part*_small.csv) or RideSearch_dataset.csv')
 DF = load_df()
-NUM = [
     'horsepower','zero_to_100_kmh_s','seats','cargo_liters','price_usd',
     'popularity_score','comfort_score','reliability_score','tech_score',
     'ownership_cost_score','safety_rating'
 ]
-# ---------- embeddings (lazy build if missing) ----------
 def ensure_emb():
-    if not (os.path.exists('emb_text.npy') and os.path.exists('emb_num.npy')):
-        from sentence_transformers import SentenceTransformer
-        m = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
-        te = m.encode(
-            DF['text_record'].astype(str).tolist(),
-            batch_size=256, show_progress_bar=True, normalize_embeddings=True
-        )
-        np.save('emb_text.npy', np.asarray(te, dtype='float32'))
-        X = DF[NUM].copy()
-        X['zero_to_100_kmh_s'] = -X['zero_to_100_kmh_s']  # smaller time = better
-        Xs = StandardScaler().fit_transform(X.values.astype('float32'))
-        np.save('emb_num.npy', Xs.astype('float32'))
-    return np.load('emb_text.npy'), np.load('emb_num.npy')
-# ---------- dependent dropdown maps ----------
-def _map():
-    m = {}
-    for mk, g in DF.groupby('make'):
-        m[mk] = {}
-        for md, g2 in g.groupby('model'):
-            m[mk][md] = {
-                'trims': sorted(g2['trim'].astype(str).unique().tolist())[:20],
-                'years': sorted(g2['year'].astype(int).unique().tolist())
-            }
-    return m
-MAP = _map()
-def models_for(mk):
-    # Return a UI update so older Gradio repopulates the choices
-    opts = sorted(MAP.get(mk, {}).keys()) if mk else []
     return gr.update(choices=opts, value=None)
-def trim_year(mk, md):
-    d = MAP.get(mk, {}).get(md, {})
-    return d.get('trims', []), d.get('years', [])
-def _up(a, b):
-    trims, years = trim_year(a, b)
     return gr.update(choices=trims, value=None), gr.update(choices=years, value=None)
-# ---------- helpers ----------
-def anchor_row(mk, md, tr, yr):
-    sub = DF.copy()
-    if mk: sub = sub[sub['make'] == mk]
-    if md: sub = sub[sub['model'] == md]
-    if tr: sub = sub[sub['trim'] == tr]
-    if yr: sub = sub[sub['year'] == yr]
-    if sub.empty: return None
-    return sub.sort_values('popularity_score', ascending=False).iloc[0]
-def apply_filters(df, body, fuel, y_min, y_max, p_min, p_max, safety, rel):
     out = df.copy()
     if body != 'Any': out = out[out['body_type'] == body]
     if fuel != 'Any': out = out[out['fuel'] == fuel]
     out = out[(out['year'] >= y_min) & (out['year'] <= y_max)]
     out = out[(out['price_usd'] >= p_min) & (out['price_usd'] <= p_max)]
-    out = out[(out['safety_rating'] >= safety) & (out['reliability_score'] >= rel)]
     return out
-def fmt_card(r):
-    eff = (f"{int(r['city_mpg'])}-{int(r['highway_mpg'])} mpg"
-           if pd.notna(r['city_mpg']) else f"{int(r['range_km'])} km range")
-    return (
-        f"**{r['name']}**\n"
-        f"- Brand: {r['make']} | Body: {r['body_type']} | Fuel: {r['fuel']}\n"
-        f"- HP: {int(r['horsepower'])} | 0–100: {r['zero_to_100_kmh_s']} s | "
-        f"Price: ${int(r['price_usd']):,}\n"
-        f"- Popularity {int(r['popularity_score'])}/10 • Comfort {int(r['comfort_score'])}/10 • "
-        f"Reliability {int(r['reliability_score'])}/100 • Safety {int(r['safety_rating'])}★"
-    )
-def recommend(mk, md, tr, yr, topk, alpha,
-              body, fuel, y_min, y_max, p_min, p_max, safety, rel):
-    a = anchor_row(mk, md, tr, yr)
     if a is None:
         return "No match for that combo.", None, None
-    sub = apply_filters(
-        DF, body, fuel, int(y_min), int(y_max), int(p_min), int(p_max), int(safety), int(rel)
-    )
-    if sub.empty:
-        return "No cars after filters.", None, None
-    Et, En = ensure_emb()
-    idx = int(a.name)
-    cand = sub.index.values
-    st = cosine_similarity(Et[idx:idx+1], Et[cand])[0]
-    sn = cosine_similarity(En[idx:idx+1], En[cand])[0]
-    s = float(alpha) * st + (1 - float(alpha)) * sn
-    import numpy as np
-    if idx in cand:
-        s[np.where(cand == idx)[0][0]] = -1
-    order = np.argsort(-s)[:topk]
-    sel = DF.loc[cand[order]].copy()
-    sel['similarity_%'] = (s[order]*100).round(1)
-    cols = ['name','make','model','trim','year','body_type','fuel','engine_type',
-            'price_usd','horsepower','zero_to_100_kmh_s',
-            'popularity_score','comfort_score','reliability_score','tech_score',
-            'ownership_cost_score','safety_rating','similarity_%']
-    return fmt_card(a), sel[cols], f"α = {alpha:.2f} (text ↔ numeric)"
-# ---------- UI (no RangeSlider; use min/max sliders) ----------
-with gr.Blocks() as demo:
-    gr.Markdown("# RideSearch — pick a car, get similar across brands")
-    with gr.Tab("Pick & Recommend"):
-        with gr.Row():
-            mk = gr.Dropdown(sorted(DF['make'].unique().tolist()), label="Make", value=None)
-            md = gr.Dropdown([], label="Model", value=None)
-            tr = gr.Dropdown([], label="Trim (optional)", value=None)
-            yr = gr.Dropdown([], label="Year (optional)", value=None)
-        mk.change(models_for, mk, md)
-        md.change(_up, [mk, md], [tr, yr])
-        ylo, yhi = int(DF['year'].min()), int(DF['year'].max())
-        plo, phi = int(DF['price_usd'].min()), int(DF['price_usd'].max())
-        with gr.Row():
-            body = gr.Dropdown(['Any']+sorted(DF['body_type'].unique().tolist()), value='Any', label='Body')
-            fuel = gr.Dropdown(['Any']+sorted(DF['fuel'].unique().tolist()), value='Any', label='Fuel')
-        with gr.Row():
-            y_min = gr.Slider(ylo, yhi, value=ylo, step=1, label='Year min')
-            y_max = gr.Slider(ylo, yhi, value=yhi, step=1, label='Year max')
-        with gr.Row():
-            p_min = gr.Slider(plo, phi, value=plo, step=500, label='Price min (USD)')
-            p_max = gr.Slider(plo, phi, value=min(phi, 60000), step=500, label='Price max (USD)')
-        with gr.Row():
-            safety = gr.Slider(3,5,value=4,step=1,label='Min Safety ★')
-            rel = gr.Slider(55,99,value=70,step=1,label='Min Reliability')
-        with gr.Row():
-            topk = gr.Slider(1,10,value=5,step=1,label='Recommendations')
-            alpha = gr.Slider(0,1,value=0.7,step=0.05,label='α — Text vs Numeric')
-        go = gr.Button("Recommend")
-        anchor_md = gr.Markdown()
-        table = gr.Dataframe(interactive=False)
-        note = gr.Markdown()
-        go.click(
-            recommend,
-            [mk,md,tr,yr,topk,alpha,body,fuel,y_min,y_max,p_min,p_max,safety,rel],
-            [anchor_md, table, note]
-        )
-# Works locally and on Spaces:
 if __name__ == "__main__":
     demo.queue().launch(server_name="0.0.0.0", server_port=7860)

+# app_new.py  — RideSearch (cross-brand, brand-correct trims, smart fallbacks)
+import os, glob
+import numpy as np
+import pandas as pd
 from sklearn.metrics.pairwise import cosine_similarity
 from sklearn.preprocessing import StandardScaler
 import gradio as gr
+# =========================
+# Data loading & embeddings
+# =========================
 def load_df():
+    """
+    Load merged dataset if present. Otherwise merge small parts (part*_small.csv).
+    """
     if os.path.exists('RideSearch_dataset.csv'):
         return pd.read_csv('RideSearch_dataset.csv')
     parts = sorted(glob.glob('RideSearch_part*_small.csv'))
+    if not parts:
+        raise FileNotFoundError(
+            "Upload RideSearch_dataset.csv OR the 10 parts RideSearch_part*_small.csv."
+        )
+    df = pd.concat([pd.read_csv(p) for p in parts], ignore_index=True)
+    df.to_csv('RideSearch_dataset.csv', index=False)
+    return df
 DF = load_df()
+# numeric columns used for numeric embedding (adjust if your CSV differs)
+NUM_COLS = [
     'horsepower','zero_to_100_kmh_s','seats','cargo_liters','price_usd',
     'popularity_score','comfort_score','reliability_score','tech_score',
     'ownership_cost_score','safety_rating'
 ]
 def ensure_emb():
+    """
+    Load or create text + numeric embeddings.
+    Text uses all-MiniLM-L6-v2 on DF['text_record'].
+    Numeric is StandardScaler on NUM_COLS (with 0-100 reversed for acceleration).
+    """
+    txt_ok = os.path.exists('emb_text.npy')
+    num_ok = os.path.exists('emb_num.npy')
+    if txt_ok and num_ok:
+        return np.load('emb_text.npy'), np.load('emb_num.npy')
+    # --- build on first run ---
+    from sentence_transformers import SentenceTransformer
+    m = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
+    texts = DF['text_record'].astype(str).tolist()
+    Etext = m.encode(texts, batch_size=256, show_progress_bar=True, normalize_embeddings=True)
+    Etext = np.asarray(Etext, dtype='float32')
+    np.save('emb_text.npy', Etext)
+    X = DF[NUM_COLS].copy()
+    # faster 0–100 → lower-better; invert accel so larger is better for similarity
+    if 'zero_to_100_kmh_s' in X.columns:
+        X['zero_to_100_kmh_s'] = -X['zero_to_100_kmh_s'].astype('float32')
+    Xs = StandardScaler().fit_transform(X.values.astype('float32'))
+    Enum = Xs.astype('float32')
+    np.save('emb_num.npy', Enum)
+    return Etext, Enum
+# ==========================================
+# Brand-correct trim display & alias mapping
+# ==========================================
+TRIM_CHOICES = {
+    ("BMW","3 Series"): ["320i","330i","330e","340i","M3"],
+    ("Audi","A3"): ["35 TFSI","40 TFSI","45 TFSI","S3","RS3"],
+    ("Audi","A4"): ["35 TFSI","40 TFSI","45 TFSI","S4","RS4"],
+    ("Mercedes-Benz","C-Class"): ["C200","C220d","C300","AMG C43","AMG C63"],
+    ("Lexus","IS"): ["IS 300","IS 350","IS 500 F SPORT"],
+    ("Toyota","Corolla"): ["L","LE","SE","XSE","GR"],
+    ("Honda","Civic"): ["LX","Sport","EX","Touring","Type R"],
+    ("Volkswagen","Golf"): ["Trendline","Comfortline","Highline","GTI","R"],
+    ("Hyundai","Elantra"): ["SE","SEL","Limited","N Line","N"],
+    ("Kia","Forte"): ["LX","S","EX","GT-Line","GT"],
+    # add more pairs you plan to demo
+}
+# Map those display trims to your dataset’s generic trim tokens
+TRIM_ALIAS_TO_GENERIC = {
+    # BMW 3
+    "320i":"Base","330i":"Sport","330e":"Sport","340i":"Premium","M3":"Performance",
+    # Audi A3/A4
+    "35 TFSI":"Base","40 TFSI":"Sport","45 TFSI":"Premium","S3":"Performance","RS3":"Performance",
+    "S4":"Performance","RS4":"Performance",
+    # Mercedes C
+    "C200":"Base","C220d":"Base","C300":"Premium","AMG C43":"Performance","AMG C63":"Performance",
+    # Lexus IS
+    "IS 300":"Base","IS 350":"Premium","IS 500 F SPORT":"Performance",
+    # Toyota Corolla
+    "L":"Base","LE":"Base","SE":"Sport","XSE":"Premium","GR":"Performance",
+    # Honda Civic
+    "LX":"Base","Sport":"Sport","EX":"Premium","Touring":"Premium","Type R":"Performance",
+    # VW Golf
+    "Trendline":"Base","Comfortline":"Base","Highline":"Premium","GTI":"Performance","R":"Performance",
+    # Hyundai Elantra
+    "SE":"Base","SEL":"Base","Limited":"Premium","N Line":"Sport","N":"Performance",
+    # Kia Forte
+    "LX":"Base","S":"Sport","EX":"Premium","GT-Line":"Sport","GT":"Performance",
+}
+# ==============================
+# Helpers: dropdowns & filtering
+# ==============================
+def models_for(make):
+    if not make:
+        return gr.update(choices=[], value=None)
+    opts = sorted(DF.loc[DF['make'].eq(make), 'model'].dropna().unique().tolist())
     return gr.update(choices=opts, value=None)
+def trim_year(make, model):
+    # Trims (brand-correct if we have them; otherwise from DF)
+    if make and model and (make, model) in TRIM_CHOICES:
+        trims = TRIM_CHOICES[(make, model)]
+    else:
+        sub = DF
+        if make:  sub = sub[sub['make'] == make]
+        if model: sub = sub[sub['model'] == model]
+        trims = sorted(sub['trim'].astype(str).dropna().unique().tolist())[:20]
+    # Years
+    if make and model:
+        years = sorted(
+            DF.loc[(DF['make'].eq(make)) & (DF['model'].eq(model)), 'year']
+              .dropna().astype(int).unique().tolist()
+        )
+    else:
+        years = []
+    return trims, years
+def on_model_change(make, model):
+    trims, years = trim_year(make, model)
     return gr.update(choices=trims, value=None), gr.update(choices=years, value=None)
+def normalize_trim_for_query(make, model, display_trim):
+    """Map pretty display trims back to dataset generic tokens (Base/Sport/...)."""
+    if not display_trim:
+        return None
+    if (make, model) in TRIM_CHOICES and display_trim in TRIM_ALIAS_TO_GENERIC:
+        return TRIM_ALIAS_TO_GENERIC[display_trim]
+    return display_trim
+def apply_filters(df, body, fuel, y_min, y_max, p_min, p_max, safety, reliab):
     out = df.copy()
     if body != 'Any': out = out[out['body_type'] == body]
     if fuel != 'Any': out = out[out['fuel'] == fuel]
     out = out[(out['year'] >= y_min) & (out['year'] <= y_max)]
     out = out[(out['price_usd'] >= p_min) & (out['price_usd'] <= p_max)]
+    out = out[(out['safety_rating'] >= safety) & (out['reliability_score'] >= reliab)]
     return out
+def fmt_anchor(r):
+    return (f"**{r['name']}**  \n"
+            f"Brand: {r['make']}  •  Model: {r['model']}  •  Trim: {r['trim']}  •  Year: {r['year']}  \n"
+            f"Body: {r['body_type']}  •  Fuel: {r['fuel']}  •  Engine: {r['engine_type']}  \n"
+            f"HP: {int(r['horsepower'])}  •  0–100: {r['zero_to_100_kmh_s']}s  •  Price: ${int(r['price_usd']):,}  \n"
+            f"Popularity {int(r['popularity_score'])}/10  •  Comfort {int(r['comfort_score'])}/10  •  "
+            f"Reliability {int(r['reliability_score'])}/100  •  Safety {int(r['safety_rating'])}★")
+# ===========================
+# Anchor selection & ranking
+# ===========================
+def anchor_row(make, model, trim_display, year):
+    """Pick the anchor row with graceful fallbacks so we never dead-end."""
+    trim_generic = normalize_trim_for_query(make, model, trim_display)
+    sub = DF.copy()
+    if make:  sub = sub[sub['make'] == make]
+    if model: sub = sub[sub['model'] == model]
+    def pick(df_):
+        return None if df_.empty else df_.sort_values('popularity_score', ascending=False).iloc[0]
+    # 1) exact
+    exact = sub.copy()
+    if trim_generic: exact = exact[exact['trim'] == trim_generic]
+    if year:         exact = exact[exact['year'] == year]
+    if not exact.empty: return pick(exact)
+    # 2) same year (ignore trim)
+    if year:
+        y_only = sub[sub['year'] == year]
+        if not y_only.empty: return pick(y_only)
+    # 3) same trim (ignore year)
+    if trim_generic:
+        t_only = sub[sub['trim'] == trim_generic]
+        if not t_only.empty: return pick(t_only)
+    # 4) fallback: best for that make+model
+    return pick(sub)
+def recommend(make, model, trim_display, year, topk, alpha,
+              body, fuel, y_min, y_max, p_min, p_max, safety, reliab,
+              cross_brand_only=True, exclude_same_model=True):
+    a = anchor_row(make, model, trim_display, year)
     if a is None:
         return "No match for that combo.", None, None
+    # candidate pool
+    pool = DF.copy()
+    if cross_brand_only:
+        pool = pool[pool['make'] != a['make']]
+    if exclude_same_model:
+        pool = pool[~((pool['make'] == a['make']) & (pool['model'] == a['model']))]
+    pool = apply_filters(pool, body, fuel, int(y_min), int(y_max), int(p_min), int(p_max), int(safety), int(reliab))
+    if pool.empty:
+        return "No cars after filters. Try widening year/price/safety.", None, None
+    Etext, Enum = ensure_emb()
+    idx_anchor = int(a.name)
+    cand_idx = pool.index.values
+    st = cosine_similarity(Etext[idx_anchor:idx_anchor+1], Etext[cand_idx])[0]
+    sn = cosine_similarity(Enum[idx_anchor:idx_anchor+1],  Enum[cand_idx])[0]
+    s = float(alpha)*st + (1-float(alpha))*sn
+    # rank, enforce unique (brand, model) combos
+    order = np.argsort(-s)
+    seen = set()
+    chosen = []
+    for j in order:
+        r = DF.loc[cand_idx[j]]
+        key = (r['make'], r['model'])
+        if key in seen:
+            continue
+        seen.add(key)
+        chosen.append(cand_idx[j])
+        if len(chosen) >= int(topk):
+            break
+    if not chosen:
+        return "No recommendations found after constraints.", None, None
+    sel = DF.loc[chosen].copy()
+    sim_lookup = {cand_idx[j]: round(float(s[j])*100, 1) for j in order}
+    sel['similarity_%'] = sel.index.map(lambda k: sim_lookup.get(k, 0.0))
+    cols = [
+        'name','make','model','trim','year','body_type','fuel','engine_type',
+        'price_usd','horsepower','zero_to_100_kmh_s',
+        'popularity_score','comfort_score','reliability_score',
+        'tech_score','ownership_cost_score','safety_rating','similarity_%'
+    ]
+    note = (f"α = {float(alpha):.2f} (text ↔ numeric) • Cross-brand only = {cross_brand_only} "
+            f"• Exclude same model = {exclude_same_model}")
+    return fmt_anchor(a), sel[cols], note
+# ============
+# Gradio UI
+# ============
+def build_ui():
+    y_lo, y_hi = int(DF['year'].min()), int(DF['year'].max())
+    p_lo, p_hi = int(DF['price_usd'].min()), int(DF['price_usd'].max())
+    with gr.Blocks() as demo:
+        gr.Markdown("# RideSearch — pick a car, get **cross-brand** similar options")
+        with gr.Tab("Pick & Recommend"):
+            with gr.Row():
+                mk = gr.Dropdown(sorted(DF['make'].dropna().unique().tolist()), label="Make")
+                md = gr.Dropdown([], label="Model")
+                tr = gr.Dropdown([], label="Trim (optional)")
+                yr = gr.Dropdown([], label="Year (optional)")
+            mk.change(models_for, mk, md)
+            md.change(on_model_change, [mk, md], [tr, yr])
+            with gr.Row():
+                body = gr.Dropdown(['Any'] + sorted(DF['body_type'].dropna().unique().tolist()),
+                                   value='Any', label='Body')
+                fuel = gr.Dropdown(['Any'] + sorted(DF['fuel'].dropna().unique().tolist()),
+                                   value='Any', label='Fuel')
+            with gr.Row():
+                y_min = gr.Slider(y_lo, y_hi, value=y_lo, step=1, label='Year min')
+                y_max = gr.Slider(y_lo, y_hi, value=y_hi, step=1, label='Year max')
+            with gr.Row():
+                p_min = gr.Slider(p_lo, p_hi, value=p_lo, step=500, label='Price min (USD)')
+                p_max = gr.Slider(p_lo, p_hi, value=min(p_hi, 80000), step=500, label='Price max (USD)')
+            with gr.Row():
+                safety = gr.Slider(3, 5, value=4, step=1, label='Min Safety ★')
+                reliab = gr.Slider(55, 99, value=70, step=1, label='Min Reliability')
+            with gr.Row():
+                topk = gr.Slider(1, 10, value=5, step=1, label='Recommendations')
+                alpha = gr.Slider(0, 1, value=0.7, step=0.05, label='α — Text vs Numeric')
+            with gr.Row():
+                cross = gr.Checkbox(label="Cross-brand only", value=True)
+                xmodel = gr.Checkbox(label="Exclude same model family", value=True)
+            go = gr.Button("Recommend")
+            anchor_md = gr.Markdown()
+            table = gr.Dataframe(interactive=False)
+            note = gr.Markdown()
+            go.click(
+                recommend,
+                [mk, md, tr, yr, topk, alpha, body, fuel, y_min, y_max, p_min, p_max, safety, reliab, cross, xmodel],
+                [anchor_md, table, note]
+            )
+        gr.Markdown("Tip: Leave Trim/Year empty if you’re not sure — the app will fall back smartly.")
+    return demo
+demo = build_ui()
 if __name__ == "__main__":
+    # Works locally and on Hugging Face Spaces
     demo.queue().launch(server_name="0.0.0.0", server_port=7860)