Spaces:
Sleeping
Sleeping
| # Trigger rebuild | |
| import os, pathlib, numpy as np, pandas as pd, gradio as gr | |
| from huggingface_hub import hf_hub_download | |
| from sentence_transformers import SentenceTransformer | |
| # --- CONFIG --- | |
| HF_DATASET_REPO = "miazaitman/CheatClean" | |
| HF_DATASET_FILE = "CheatClean Data set.csv" # keep spaces | |
| DATA_DIR = pathlib.Path("./data"); DATA_DIR.mkdir(exist_ok=True) | |
| DATA_LOCAL = DATA_DIR / HF_DATASET_FILE | |
| EMBED_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2" | |
| # --- Load dataset --- | |
| def load_dataset(): | |
| if not DATA_LOCAL.exists(): | |
| hf_hub_download( | |
| repo_id=HF_DATASET_REPO, | |
| filename=HF_DATASET_FILE, | |
| repo_type="dataset", | |
| local_dir=str(DATA_DIR), | |
| local_dir_use_symlinks=False | |
| ) | |
| df = pd.read_csv(DATA_LOCAL) | |
| needed = [ | |
| "Unhealthy_Food", | |
| "Alt1_Name","Alt1_Description","Alt1_Estimated_Calorie_Delta_kcal","Alt1_Macro_Delta","Alt1_Tip", | |
| "Alt2_Name","Alt2_Description","Alt2_Estimated_Calorie_Delta_kcal","Alt2_Macro_Delta","Alt2_Tip", | |
| "Alt3_Name","Alt3_Description","Alt3_Estimated_Calorie_Delta_kcal","Alt3_Macro_Delta","Alt3_Tip", | |
| ] | |
| missing = [c for c in needed if c not in df.columns] | |
| if missing: | |
| raise ValueError(f"Missing columns: {missing}") | |
| return df.dropna(subset=["Unhealthy_Food"]).reset_index(drop=True) | |
| # --- Embeddings (no FAISS) --- | |
| def build_embeddings(texts): | |
| model = SentenceTransformer(EMBED_MODEL_NAME) | |
| embs = model.encode(list(texts), convert_to_numpy=True, show_progress_bar=True) | |
| norms = np.linalg.norm(embs, axis=1, keepdims=True) + 1e-12 | |
| embs = embs / norms # L2-normalize for cosine similarity | |
| return model, embs | |
| def cosine_top_row(query, model, embs): | |
| if not query or not query.strip(): | |
| return None | |
| q = query.strip() | |
| q_emb = model.encode([q], convert_to_numpy=True) | |
| q_emb = q_emb / (np.linalg.norm(q_emb, axis=1, keepdims=True) + 1e-12) | |
| scores = embs @ q_emb.T # shape (N,1) | |
| return int(np.argmax(scores[:, 0])) | |
| def to_three_alternatives(row): | |
| return [ | |
| {"Rank": 1, "Healthier Alternative": row["Alt1_Name"], | |
| "Description": row["Alt1_Description"], | |
| "Calorie/Nutrient Difference": f'{row["Alt1_Estimated_Calorie_Delta_kcal"]} kcal; {row["Alt1_Macro_Delta"]}', | |
| "Tip": row["Alt1_Tip"]}, | |
| {"Rank": 2, "Healthier Alternative": row["Alt2_Name"], | |
| "Description": row["Alt2_Description"], | |
| "Calorie/Nutrient Difference": f'{row["Alt2_Estimated_Calorie_Delta_kcal"]} kcal; {row["Alt2_Macro_Delta"]}', | |
| "Tip": row["Alt2_Tip"]}, | |
| {"Rank": 3, "Healthier Alternative": row["Alt3_Name"], | |
| "Description": row["Alt3_Description"], | |
| "Calorie/Nutrient Difference": f'{row["Alt3_Estimated_Calorie_Delta_kcal"]} kcal; {row["Alt3_Macro_Delta"]}', | |
| "Tip": row["Alt3_Tip"]}, | |
| ] | |
| # --- UI --- | |
| def search_ui(user_food): | |
| idx = cosine_top_row(user_food, model, embs) | |
| if idx is None: | |
| return f"**You entered:** _{user_food}_\n\nNo matches found.", None | |
| row = df.iloc[idx] | |
| echoed = f"**You entered:** _{user_food}_" | |
| table = pd.DataFrame( | |
| to_three_alternatives(row), | |
| columns=["Rank","Healthier Alternative","Description","Calorie/Nutrient Difference","Tip"] | |
| ) | |
| return echoed, table | |
| def build_interface(): | |
| examples = [["Hamburger"],["Cheeseburger"],["Pepperoni Pizza"], | |
| ["Fried Chicken Sandwich"],["Nachos"],["Mac and Cheese"]] | |
| with gr.Blocks(title="Healthy Food Alternatives") as demo: | |
| gr.Markdown("# 🥗 Healthy Food Alternatives\nType a food you like to see healthier options.") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| inp = gr.Textbox(label="Enter a food you like", placeholder="e.g., Hamburger") | |
| btn = gr.Button("Find Healthier Alternatives", variant="primary") | |
| gr.Examples(examples=examples, inputs=inp, label="Try one") | |
| with gr.Column(scale=2): | |
| echoed = gr.Markdown() | |
| table = gr.Dataframe(headers=["Rank","Healthier Alternative","Description","Calorie/Nutrient Difference","Tip"], | |
| row_count=(3,"fixed"), wrap=True) | |
| btn.click(search_ui, inputs=inp, outputs=[echoed, table]) | |
| inp.submit(search_ui, inputs=inp, outputs=[echoed, table]) | |
| return demo | |
| # --- Boot --- | |
| df = load_dataset() | |
| model, embs = build_embeddings(df["Unhealthy_Food"].astype(str).tolist()) | |
| def quick_eval(samples=("Hamburger","Nachos","Pepperoni Pizza")): | |
| print("=== Quick Eval (cosine top-1 row -> 3 alts) ===") | |
| for s in samples: | |
| idx = cosine_top_row(s, model, embs) | |
| r = df.iloc[idx] | |
| print(f"\nQuery: {s} -> Row match: {r['Unhealthy_Food']}") | |
| for x in to_three_alternatives(r): | |
| print(f" {x['Rank']}. {x['Healthier Alternative']} | {x['Calorie/Nutrient Difference']}") | |
| # call it once at startup | |
| quick_eval() | |
| app = build_interface() | |
| if __name__ == "__main__": | |
| app.launch() | |