Spaces:

miazaitman
/

cheat-clean

Sleeping

App Files Files Community

cheat-clean / app.py

miazaitman

Update app.py

e860f71 verified 5 months ago

raw

history blame contribute delete

5.02 kB

	# Trigger rebuild

	import os, pathlib, numpy as np, pandas as pd, gradio as gr
	from huggingface_hub import hf_hub_download
	from sentence_transformers import SentenceTransformer

	# --- CONFIG ---
	HF_DATASET_REPO = "miazaitman/CheatClean"
	HF_DATASET_FILE = "CheatClean Data set.csv" # keep spaces
	DATA_DIR = pathlib.Path("./data"); DATA_DIR.mkdir(exist_ok=True)
	DATA_LOCAL = DATA_DIR / HF_DATASET_FILE
	EMBED_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"

	# --- Load dataset ---
	def load_dataset():
	if not DATA_LOCAL.exists():
	hf_hub_download(
	repo_id=HF_DATASET_REPO,
	filename=HF_DATASET_FILE,
	repo_type="dataset",
	local_dir=str(DATA_DIR),
	local_dir_use_symlinks=False
	)
	df = pd.read_csv(DATA_LOCAL)
	needed = [
	"Unhealthy_Food",
	"Alt1_Name","Alt1_Description","Alt1_Estimated_Calorie_Delta_kcal","Alt1_Macro_Delta","Alt1_Tip",
	"Alt2_Name","Alt2_Description","Alt2_Estimated_Calorie_Delta_kcal","Alt2_Macro_Delta","Alt2_Tip",
	"Alt3_Name","Alt3_Description","Alt3_Estimated_Calorie_Delta_kcal","Alt3_Macro_Delta","Alt3_Tip",
	]
	missing = [c for c in needed if c not in df.columns]
	if missing:
	raise ValueError(f"Missing columns: {missing}")
	return df.dropna(subset=["Unhealthy_Food"]).reset_index(drop=True)

	# --- Embeddings (no FAISS) ---
	def build_embeddings(texts):
	model = SentenceTransformer(EMBED_MODEL_NAME)
	embs = model.encode(list(texts), convert_to_numpy=True, show_progress_bar=True)
	norms = np.linalg.norm(embs, axis=1, keepdims=True) + 1e-12
	embs = embs / norms # L2-normalize for cosine similarity
	return model, embs

	def cosine_top_row(query, model, embs):
	if not query or not query.strip():
	return None
	q = query.strip()
	q_emb = model.encode([q], convert_to_numpy=True)
	q_emb = q_emb / (np.linalg.norm(q_emb, axis=1, keepdims=True) + 1e-12)
	scores = embs @ q_emb.T # shape (N,1)
	return int(np.argmax(scores[:, 0]))

	def to_three_alternatives(row):
	return [
	{"Rank": 1, "Healthier Alternative": row["Alt1_Name"],
	"Description": row["Alt1_Description"],
	"Calorie/Nutrient Difference": f'{row["Alt1_Estimated_Calorie_Delta_kcal"]} kcal; {row["Alt1_Macro_Delta"]}',
	"Tip": row["Alt1_Tip"]},
	{"Rank": 2, "Healthier Alternative": row["Alt2_Name"],
	"Description": row["Alt2_Description"],
	"Calorie/Nutrient Difference": f'{row["Alt2_Estimated_Calorie_Delta_kcal"]} kcal; {row["Alt2_Macro_Delta"]}',
	"Tip": row["Alt2_Tip"]},
	{"Rank": 3, "Healthier Alternative": row["Alt3_Name"],
	"Description": row["Alt3_Description"],
	"Calorie/Nutrient Difference": f'{row["Alt3_Estimated_Calorie_Delta_kcal"]} kcal; {row["Alt3_Macro_Delta"]}',
	"Tip": row["Alt3_Tip"]},
	]

	# --- UI ---
	def search_ui(user_food):
	idx = cosine_top_row(user_food, model, embs)
	if idx is None:
	return f"You entered: _{user_food}_\n\nNo matches found.", None
	row = df.iloc[idx]
	echoed = f"You entered: _{user_food}_"
	table = pd.DataFrame(
	to_three_alternatives(row),
	columns=["Rank","Healthier Alternative","Description","Calorie/Nutrient Difference","Tip"]
	)
	return echoed, table

	def build_interface():
	examples = [["Hamburger"],["Cheeseburger"],["Pepperoni Pizza"],
	["Fried Chicken Sandwich"],["Nachos"],["Mac and Cheese"]]
	with gr.Blocks(title="Healthy Food Alternatives") as demo:
	gr.Markdown("# 🥗 Healthy Food Alternatives\nType a food you like to see healthier options.")
	with gr.Row():
	with gr.Column(scale=1):
	inp = gr.Textbox(label="Enter a food you like", placeholder="e.g., Hamburger")
	btn = gr.Button("Find Healthier Alternatives", variant="primary")
	gr.Examples(examples=examples, inputs=inp, label="Try one")
	with gr.Column(scale=2):
	echoed = gr.Markdown()
	table = gr.Dataframe(headers=["Rank","Healthier Alternative","Description","Calorie/Nutrient Difference","Tip"],
	row_count=(3,"fixed"), wrap=True)
	btn.click(search_ui, inputs=inp, outputs=[echoed, table])
	inp.submit(search_ui, inputs=inp, outputs=[echoed, table])
	return demo

	# --- Boot ---
	df = load_dataset()
	model, embs = build_embeddings(df["Unhealthy_Food"].astype(str).tolist())
	def quick_eval(samples=("Hamburger","Nachos","Pepperoni Pizza")):
	print("=== Quick Eval (cosine top-1 row -> 3 alts) ===")
	for s in samples:
	idx = cosine_top_row(s, model, embs)
	r = df.iloc[idx]
	print(f"\nQuery: {s} -> Row match: {r['Unhealthy_Food']}")
	for x in to_three_alternatives(r):
	print(f" {x['Rank']}. {x['Healthier Alternative']} \| {x['Calorie/Nutrient Difference']}")

	# call it once at startup
	quick_eval()
	app = build_interface()

	if __name__ == "__main__":
	app.launch()