Spaces:

Levimichael4
/

DataDrive

Sleeping

App Files Files Community

DataDrive / app.py

Levimichael4

Rename app-2.py to app.py

3c36d6b verified 5 months ago

raw

history blame contribute delete

4.52 kB


	import os
	import numpy as np
	import pandas as pd
	from sklearn.metrics.pairwise import cosine_similarity
	import gradio as gr

	DATASET_CSV = os.getenv("DATASET_CSV", "DataDrive_dataset.csv")
	EMB_PATH = os.getenv("EMB_PATH", "embeddings.npy")
	ID_PATH = os.getenv("ID_PATH", "ids.csv")

	DF = pd.read_csv(DATASET_CSV)

	def ensure_embeddings():
	if not os.path.exists(EMB_PATH) or not os.path.exists(ID_PATH):
	try:
	from sentence_transformers import SentenceTransformer
	model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
	embs = model.encode(DF["text_record"].astype(str).tolist(), batch_size=256, show_progress_bar=True, normalize_embeddings=True)
	embs = np.asarray(embs, dtype="float32")
	np.save(EMB_PATH, embs)
	DF[["name","make","model","trim","year"]].to_csv(ID_PATH, index=False)
	except Exception as e:
	raise RuntimeError(f"Embeddings not found and auto-build failed: {e}")
	return np.load(EMB_PATH)

	def _format_row(row):
	return (
	f"{row['name']} \n"
	f"- Origin: {row['origin_country']} \n"
	f"- Body: {row['body_type']} \| Fuel: {row['fuel']} \| Engine: {row['engine_type']} \n"
	f"- Drivetrain: {row['drivetrain']} \| Transmission: {row['transmission']} \n"
	f"- HP: {int(row['horsepower'])} \| Seats: {int(row['seats'])} \| Efficiency: {row['efficiency']} \n"
	f"- MSRP (USD): ${int(row['msrp_usd']):,} \n"
	f"- Popularity: {int(row['popularity_score'])}/10 \| Comfort: {int(row['comfort_score'])}/10 \n"
	)

	def search_and_recommend(query, k=3):
	if not query or not query.strip():
	return "Type a car name, brand, or model.", None, None
	q = query.strip().lower()
	mask = (
	DF["name"].str.lower().str.contains(q) \|
	DF["model"].str.lower().str.contains(q) \|
	DF["make"].str.lower().str.contains(q)
	)
	if not mask.any():
	return f"No match found for: {query}", None, None
	idx = DF.index[mask][0]
	anchor = DF.loc[idx]

	embs = ensure_embeddings()
	sims = cosine_similarity(embs[idx:idx+1], embs)[0]
	sims[idx] = -1
	top_idx = sims.argsort()[::-1][:k]
	top_rows = DF.iloc[top_idx].copy()
	top_rows["similarity"] = sims[top_idx]

	anchor_md = _format_row(anchor)
	recs_df = top_rows[[
	"name","make","model","trim","year","origin_country","body_type",
	"fuel","engine_type","horsepower","popularity_score","comfort_score"
	]].copy()
	recs_df["similarity"] = (top_rows["similarity"] * 100).round(1)
	return anchor_md, recs_df, f"Top {k} similar results shown (by cosine similarity on text embeddings)."

	def brand_compare(brands):
	if not brands:
	return None
	sel = DF[DF["make"].isin(brands)].copy()
	out = (sel.groupby(["make","origin_country"])
	.agg(
	samples=("name","count"),
	avg_popularity=("popularity_score","mean"),
	avg_comfort=("comfort_score","mean"),
	avg_hp=("horsepower","mean"),
	avg_msrp=("msrp_usd","mean"),
	)
	.reset_index())
	out["avg_popularity"] = out["avg_popularity"].round(1)
	out["avg_comfort"] = out["avg_comfort"].round(1)
	out["avg_hp"] = out["avg_hp"].round(0).astype(int)
	out["avg_msrp"] = out["avg_msrp"].round(0).astype(int)
	return out

	with gr.Blocks() as demo:
	gr.Markdown("# DataDrive — Cars Recommender (Text, 1,200 records)")
	with gr.Tab("Search + Recommend"):
	gr.Markdown("Enter a car (brand/model/name). We show the match and the Top-3 similar cars.")
	inp = gr.Textbox(label="Search", placeholder="e.g., Toyota Corolla, Model 3, Golf, BMW 3 Series")
	topk = gr.Slider(1, 5, value=3, step=1, label="Number of recommendations")
	btn = gr.Button("Find & Recommend")

	anchor_md = gr.Markdown()
	recs_df = gr.Dataframe(interactive=False)
	note = gr.Markdown()

	btn.click(search_and_recommend, [inp, topk], [anchor_md, recs_df, note])
	inp.submit(search_and_recommend, [inp, topk], [anchor_md, recs_df, note])

	with gr.Tab("Brand Compare"):
	all_brands = sorted(DF["make"].unique().tolist())
	brands_inp = gr.CheckboxGroup(choices=all_brands, label="Pick brands", value=["Toyota","Honda","BMW"])
	table = gr.Dataframe(interactive=False)
	brands_inp.change(brand_compare, brands_inp, table)
	table.value = brand_compare(["Toyota","Honda","BMW"])

	demo.launch()