import pandas as pd import numpy as np import gradio as gr from sklearn.preprocessing import MinMaxScaler, OneHotEncoder from sklearn.metrics.pairwise import cosine_similarity from sklearn.neighbors import NearestNeighbors CSV_PATH = "all-vehicles-model@public.csv" # ===== טעינת נתונים בבטחה ===== def read_csv_safely(path): try: return pd.read_csv(path, sep=";", encoding="utf-8", engine="python") except Exception: return pd.read_csv(path, encoding="utf-8") df = read_csv_safely(CSV_PATH) # עמודות עיקריות FUEL_COL = "Fuel Type1" if "Fuel Type1" in df.columns else ("Fuel Type" if "Fuel Type" in df.columns else None) TRANS_COL = "Transmission" if "Transmission" in df.columns else None DRIVE_COL = "Drive" if "Drive" in df.columns else None YEAR_COL = "Year" if "Year" in df.columns else None MAKE_COL = "Make" if "Make" in df.columns else None MODEL_COL = "Model" if "Model" in df.columns else None # המרות נומריות שכיחות for c in [ "City Mpg For Fuel Type1","Highway Mpg For Fuel Type1","Combined Mpg For Fuel Type1", "Annual Fuel Cost For Fuel Type1","Epa Range For Fuel Type2","Engine displacement","Cylinders", YEAR_COL ]: if c and c in df.columns: df[c] = pd.to_numeric(df[c], errors="coerce") # ===== אופציות לדרופדאונים ===== def unique_opts(col, default=None, limit=120): if col and col in df.columns: vals = ( df[col].dropna().astype(str).str.strip() .replace("", pd.NA).dropna().unique().tolist() ) vals = sorted(vals)[:limit] if default is None: return vals return [default] + vals return [default] if default else [] TRANS_OPTS = ["No preference", "Automatic (all)", "Manual (all)"] + unique_opts(TRANS_COL, None) FUEL_OPTS = unique_opts(FUEL_COL, "No preference") _drive_from_data = unique_opts(DRIVE_COL, None) if not _drive_from_data: _drive_from_data = ["2-Wheel Drive","4-Wheel Drive","All Wheel Drive"] DRIVE_OPTS = ["No preference"] + _drive_from_data # ===== פילטרים ===== def filter_by_fuel(df_in, sel): if not FUEL_COL or sel == "No preference": return df_in s = df_in[FUEL_COL].astype(str) return df_in[s.str.contains(sel, case=False, na=False)] def filter_by_transmission(df_in, sel): if not TRANS_COL or sel == "No preference": return df_in s = df_in[TRANS_COL].astype(str) if sel == "Automatic (all)": return df_in[s.str.contains("auto", case=False, na=False)] if sel == "Manual (all)": return df_in[s.str.contains("man", case=False, na=False)] return df_in[s.str.strip().str.lower() == sel.strip().lower()] def filter_by_drive(df_in, sel): if not DRIVE_COL or sel == "No preference": return df_in s = df_in[DRIVE_COL].astype(str) return df_in[s.str.contains(sel, case=False, na=False)] def filter_by_vintage(df_in, kind): if not YEAR_COL or kind == "No preference": return df_in cutoff = pd.Timestamp.now().year - 30 y = pd.to_numeric(df_in[YEAR_COL], errors="coerce") if kind == "Vintage": return df_in[y <= cutoff] if kind == "Regular": return df_in[y > cutoff] return df_in # ===== פיצ'רים לכלל הדאטה ===== NUM_COLS = [c for c in [ "City Mpg For Fuel Type1","Highway Mpg For Fuel Type1","Combined Mpg For Fuel Type1", "Annual Fuel Cost For Fuel Type1","Epa Range For Fuel Type2","Engine displacement","Cylinders", YEAR_COL ] if c and c in df.columns] CAT_COLS = [c for c in [FUEL_COL,DRIVE_COL] if c and c in df.columns] if NUM_COLS: _scaler = MinMaxScaler() num_mat = _scaler.fit_transform( df[NUM_COLS].apply(pd.to_numeric, errors="coerce").fillna(df[NUM_COLS].median()) ) else: _scaler = None num_mat = np.zeros((len(df), 0)) if CAT_COLS: _enc = OneHotEncoder(handle_unknown="ignore", sparse_output=False) cat_mat = _enc.fit_transform(df[CAT_COLS].astype(str)) else: _enc = None cat_mat = np.zeros((len(df), 0)) FEATURES = np.concatenate([num_mat, cat_mat], axis=1) if len(df) else np.zeros((0,0)) # ===== וקטור יעד למשתמש ===== def user_vector(params): if NUM_COLS: base = {} if "Combined Mpg For Fuel Type1" in NUM_COLS: base["Combined Mpg For Fuel Type1"] = df["Combined Mpg For Fuel Type1"].max() if "City Mpg For Fuel Type1" in NUM_COLS: base["City Mpg For Fuel Type1"] = df["City Mpg For Fuel Type1"].median() if "Highway Mpg For Fuel Type1" in NUM_COLS: base["Highway Mpg For Fuel Type1"] = df["Highway Mpg For Fuel Type1"].median() if "Annual Fuel Cost For Fuel Type1" in NUM_COLS: base["Annual Fuel Cost For Fuel Type1"] = df["Annual Fuel Cost For Fuel Type1"].min() if "Epa Range For Fuel Type2" in NUM_COLS: base["Epa Range For Fuel Type2"] = df["Epa Range For Fuel Type2"].max() if "Engine displacement" in NUM_COLS: base["Engine displacement"] = df["Engine displacement"].median() if "Cylinders" in NUM_COLS: base["Cylinders"] = df["Cylinders"].median() if YEAR_COL in NUM_COLS: cutoff = pd.Timestamp.now().year - 30 base[YEAR_COL] = df[YEAR_COL].max() if params.get("vintage")=="Regular" else cutoff for c in NUM_COLS: if c not in base: base[c] = df[c].median() user_num = _scaler.transform(pd.DataFrame([base])[NUM_COLS]) if _scaler else np.zeros((1,0)) else: user_num = np.zeros((1,0)) if CAT_COLS: crow = {c:"" for c in CAT_COLS} if FUEL_COL in CAT_COLS and params.get("fuel_pref") and params["fuel_pref"]!="No preference": crow[FUEL_COL] = params["fuel_pref"] if DRIVE_COL in CAT_COLS and params.get("drive_pref") and params["drive_pref"]!="No preference": crow[DRIVE_COL] = params["drive_pref"] user_cat = _enc.transform(pd.DataFrame([crow])[CAT_COLS].astype(str)) else: user_cat = np.zeros((1,0)) return np.concatenate([user_num, user_cat], axis=1) # ===== KNN על תת הקבוצה עם נפילה לקוסינוס ===== def knn_scores_for_filtered(filtered_idx, user_vec, k=50): feats = FEATURES[filtered_idx, :] if feats.size == 0: return np.array([], dtype=int), np.array([]) n = int(min(k, max(1, feats.shape[0]))) nn = NearestNeighbors(n_neighbors=n, metric="cosine") nn.fit(feats) dists, inds = nn.kneighbors(user_vec, return_distance=True) sims = 1.0 - dists[0] order = inds[0] return order, sims # ===== לוגיקת המלצה ===== def recommend(usage, daily_km, budget_fuel, fuel_pref, drive_pref, trans_pref, cargo_need, perf_pref, seats_min, vintage_sel): data = df.copy() data = filter_by_fuel(data, fuel_pref) data = filter_by_transmission(data, trans_pref) data = filter_by_drive(data, drive_pref) data = filter_by_vintage(data, vintage_sel) if len(data)==0 and trans_pref!="No preference": data = filter_by_transmission(df.copy(), "No preference") data = filter_by_fuel(data, fuel_pref) data = filter_by_drive(data, drive_pref) data = filter_by_vintage(data, vintage_sel) if len(data)==0 and drive_pref!="No preference": data = filter_by_drive(df.copy(), "No preference") data = filter_by_fuel(data, fuel_pref) data = filter_by_transmission(data, trans_pref) data = filter_by_vintage(data, vintage_sel) if len(data)==0 and fuel_pref!="No preference": data = filter_by_fuel(df.copy(), "No preference") data = filter_by_transmission(data, trans_pref) data = filter_by_drive(data, drive_pref) data = filter_by_vintage(data, vintage_sel) if len(data)==0: return pd.DataFrame(columns=[MAKE_COL,MODEL_COL,YEAR_COL,FUEL_COL,TRANS_COL,DRIVE_COL,"Match Score"]), "לא נמצאו רכבים. נסה להרחיב בחירות." idx_arr = data.index.to_numpy() u = user_vector(dict(fuel_pref=fuel_pref, drive_pref=drive_pref, vintage=vintage_sel)) try: order, sims = knn_scores_for_filtered(idx_arr, u, k=50) if sims.size == 0: feats = FEATURES[idx_arr, :] sims = cosine_similarity(u, feats)[0] if feats.size else np.zeros(len(idx_arr)) order = np.argsort(-sims) except Exception: feats = FEATURES[idx_arr, :] sims = cosine_similarity(u, feats)[0] if feats.size else np.zeros(len(idx_arr)) order = np.argsort(-sims) data = data.iloc[order].copy() data["Match Score"] = (sims * 100).round(2) cols_show = [c for c in [ MAKE_COL, MODEL_COL, YEAR_COL, FUEL_COL, TRANS_COL, DRIVE_COL, "City Mpg For Fuel Type1" if "City Mpg For Fuel Type1" in data.columns else None, "Highway Mpg For Fuel Type1" if "Highway Mpg For Fuel Type1" in data.columns else None, "Combined Mpg For Fuel Type1" if "Combined Mpg For Fuel Type1" in data.columns else None, "Annual Fuel Cost For Fuel Type1" if "Annual Fuel Cost For Fuel Type1" in data.columns else None, "Epa Range For Fuel Type2" if "Epa Range For Fuel Type2" in data.columns else None, "Match Score" ] if c] top = data.head(10)[cols_show] return top, f"נמצאו {len(data)} דגמים אחרי סינון. מוצגים טופ {len(top)}." # ===== UI ===== with gr.Blocks(title="ממליץ רכבים חכם") as demo: gr.Markdown("# ממליץ רכבים חכם\nבחר העדפות וקבל התאמות עם ציון AI. אפשר לבחור גיר כללי או דגם ספציפי.") with gr.Row(): usage = gr.Radio(["עיר","בינעירוני","מעורב"], value="מעורב", label="אופי שימוש") daily_km = gr.Slider(0, 1000, value=20, step=5, label="נסועה יומית ממוצעת בקילומטרים") budget_fuel = gr.Slider(0, 10000, value=5000, step=100, label="תקציב דלק או חשמל שנתי") with gr.Row(): fuel_pref = gr.Dropdown(FUEL_OPTS, value="No preference", label="עדיפות לסוג דלק") drive_pref = gr.Dropdown(DRIVE_OPTS, value="No preference", label="הנעה") trans_pref = gr.Dropdown(TRANS_OPTS, value="No preference", label="תיבת הילוכים") with gr.Row(): cargo_need = gr.Slider(0, 800, value=360, step=20, label="נפח מטען מינימלי רצוי") perf_pref = gr.Slider(0, 1, value=0.85, step=0.05, label="עדיפות 1 ביצועים 0 חסכון") seats_min = gr.Slider(2, 8, value=2, step=1, label="מושבים מינימום") vintage = gr.Radio(["No preference","Regular","Vintage"], value="No preference", label="סוג רכב") btn = gr.Button("מצא רכבים") out_tbl = gr.Dataframe(interactive=False, wrap=True, label="התאמות מומלצות . טופ 10") status = gr.Markdown("") btn.click( fn=recommend, inputs=[usage, daily_km, budget_fuel, fuel_pref, drive_pref, trans_pref, cargo_need, perf_pref, seats_min, vintage], outputs=[out_tbl, status] ) demo.queue().launch()