Spaces:
Sleeping
Sleeping
File size: 5,024 Bytes
bbe06bd 50ccb88 4f52e4a 50ccb88 4f52e4a 50ccb88 4f52e4a 50ccb88 4f52e4a 1e7d23b 4f52e4a 50ccb88 4f52e4a 50ccb88 4f52e4a 50ccb88 4f52e4a 50ccb88 4f52e4a 50ccb88 4f52e4a 50ccb88 4f52e4a 50ccb88 4f52e4a 50ccb88 4f52e4a 50ccb88 4f52e4a 50ccb88 4f52e4a 50ccb88 4f52e4a 50ccb88 4f52e4a 50ccb88 4f52e4a 50ccb88 4f52e4a 50ccb88 e860f71 4f52e4a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
# Trigger rebuild
import os, pathlib, numpy as np, pandas as pd, gradio as gr
from huggingface_hub import hf_hub_download
from sentence_transformers import SentenceTransformer
# --- CONFIG ---
HF_DATASET_REPO = "miazaitman/CheatClean"
HF_DATASET_FILE = "CheatClean Data set.csv" # keep spaces
DATA_DIR = pathlib.Path("./data"); DATA_DIR.mkdir(exist_ok=True)
DATA_LOCAL = DATA_DIR / HF_DATASET_FILE
EMBED_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
# --- Load dataset ---
def load_dataset():
if not DATA_LOCAL.exists():
hf_hub_download(
repo_id=HF_DATASET_REPO,
filename=HF_DATASET_FILE,
repo_type="dataset",
local_dir=str(DATA_DIR),
local_dir_use_symlinks=False
)
df = pd.read_csv(DATA_LOCAL)
needed = [
"Unhealthy_Food",
"Alt1_Name","Alt1_Description","Alt1_Estimated_Calorie_Delta_kcal","Alt1_Macro_Delta","Alt1_Tip",
"Alt2_Name","Alt2_Description","Alt2_Estimated_Calorie_Delta_kcal","Alt2_Macro_Delta","Alt2_Tip",
"Alt3_Name","Alt3_Description","Alt3_Estimated_Calorie_Delta_kcal","Alt3_Macro_Delta","Alt3_Tip",
]
missing = [c for c in needed if c not in df.columns]
if missing:
raise ValueError(f"Missing columns: {missing}")
return df.dropna(subset=["Unhealthy_Food"]).reset_index(drop=True)
# --- Embeddings (no FAISS) ---
def build_embeddings(texts):
model = SentenceTransformer(EMBED_MODEL_NAME)
embs = model.encode(list(texts), convert_to_numpy=True, show_progress_bar=True)
norms = np.linalg.norm(embs, axis=1, keepdims=True) + 1e-12
embs = embs / norms # L2-normalize for cosine similarity
return model, embs
def cosine_top_row(query, model, embs):
if not query or not query.strip():
return None
q = query.strip()
q_emb = model.encode([q], convert_to_numpy=True)
q_emb = q_emb / (np.linalg.norm(q_emb, axis=1, keepdims=True) + 1e-12)
scores = embs @ q_emb.T # shape (N,1)
return int(np.argmax(scores[:, 0]))
def to_three_alternatives(row):
return [
{"Rank": 1, "Healthier Alternative": row["Alt1_Name"],
"Description": row["Alt1_Description"],
"Calorie/Nutrient Difference": f'{row["Alt1_Estimated_Calorie_Delta_kcal"]} kcal; {row["Alt1_Macro_Delta"]}',
"Tip": row["Alt1_Tip"]},
{"Rank": 2, "Healthier Alternative": row["Alt2_Name"],
"Description": row["Alt2_Description"],
"Calorie/Nutrient Difference": f'{row["Alt2_Estimated_Calorie_Delta_kcal"]} kcal; {row["Alt2_Macro_Delta"]}',
"Tip": row["Alt2_Tip"]},
{"Rank": 3, "Healthier Alternative": row["Alt3_Name"],
"Description": row["Alt3_Description"],
"Calorie/Nutrient Difference": f'{row["Alt3_Estimated_Calorie_Delta_kcal"]} kcal; {row["Alt3_Macro_Delta"]}',
"Tip": row["Alt3_Tip"]},
]
# --- UI ---
def search_ui(user_food):
idx = cosine_top_row(user_food, model, embs)
if idx is None:
return f"**You entered:** _{user_food}_\n\nNo matches found.", None
row = df.iloc[idx]
echoed = f"**You entered:** _{user_food}_"
table = pd.DataFrame(
to_three_alternatives(row),
columns=["Rank","Healthier Alternative","Description","Calorie/Nutrient Difference","Tip"]
)
return echoed, table
def build_interface():
examples = [["Hamburger"],["Cheeseburger"],["Pepperoni Pizza"],
["Fried Chicken Sandwich"],["Nachos"],["Mac and Cheese"]]
with gr.Blocks(title="Healthy Food Alternatives") as demo:
gr.Markdown("# 🥗 Healthy Food Alternatives\nType a food you like to see healthier options.")
with gr.Row():
with gr.Column(scale=1):
inp = gr.Textbox(label="Enter a food you like", placeholder="e.g., Hamburger")
btn = gr.Button("Find Healthier Alternatives", variant="primary")
gr.Examples(examples=examples, inputs=inp, label="Try one")
with gr.Column(scale=2):
echoed = gr.Markdown()
table = gr.Dataframe(headers=["Rank","Healthier Alternative","Description","Calorie/Nutrient Difference","Tip"],
row_count=(3,"fixed"), wrap=True)
btn.click(search_ui, inputs=inp, outputs=[echoed, table])
inp.submit(search_ui, inputs=inp, outputs=[echoed, table])
return demo
# --- Boot ---
df = load_dataset()
model, embs = build_embeddings(df["Unhealthy_Food"].astype(str).tolist())
def quick_eval(samples=("Hamburger","Nachos","Pepperoni Pizza")):
print("=== Quick Eval (cosine top-1 row -> 3 alts) ===")
for s in samples:
idx = cosine_top_row(s, model, embs)
r = df.iloc[idx]
print(f"\nQuery: {s} -> Row match: {r['Unhealthy_Food']}")
for x in to_three_alternatives(r):
print(f" {x['Rank']}. {x['Healthier Alternative']} | {x['Calorie/Nutrient Difference']}")
# call it once at startup
quick_eval()
app = build_interface()
if __name__ == "__main__":
app.launch()
|