miazaitman commited on
Commit
4f52e4a
·
verified ·
1 Parent(s): 3eb824e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +141 -0
app.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, pathlib, numpy as np, pandas as pd, faiss, gradio as gr
2
+ from huggingface_hub import hf_hub_download
3
+ from sentence_transformers import SentenceTransformer
4
+
5
+ # =========================
6
+ # CONFIG — EDIT IF NEEDED
7
+ # =========================
8
+ HF_DATASET_REPO = "miazaitman/CheatClean"
9
+ HF_DATASET_FILE = "CheatClean Data set.csv" # keep spaces exactly as in the file name
10
+
11
+ DATA_DIR = pathlib.Path("./data"); DATA_DIR.mkdir(exist_ok=True)
12
+ CACHE_DIR = pathlib.Path("./cache"); CACHE_DIR.mkdir(exist_ok=True)
13
+ DATA_LOCAL = DATA_DIR / HF_DATASET_FILE
14
+ EMBED_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
15
+
16
+ # -------------------------
17
+ # Load dataset from HF Hub
18
+ # -------------------------
19
+ def load_dataset():
20
+ if not DATA_LOCAL.exists():
21
+ hf_hub_download(
22
+ repo_id=HF_DATASET_REPO,
23
+ filename=HF_DATASET_FILE,
24
+ local_dir=str(DATA_DIR),
25
+ local_dir_use_symlinks=False
26
+ )
27
+ df = pd.read_csv(DATA_LOCAL)
28
+
29
+ # Expected columns from CheatClean dataset
30
+ needed = [
31
+ "Unhealthy_Food",
32
+ "Alt1_Name", "Alt1_Description", "Alt1_Estimated_Calorie_Delta_kcal", "Alt1_Macro_Delta", "Alt1_Tip",
33
+ "Alt2_Name", "Alt2_Description", "Alt2_Estimated_Calorie_Delta_kcal", "Alt2_Macro_Delta", "Alt2_Tip",
34
+ "Alt3_Name", "Alt3_Description", "Alt3_Estimated_Calorie_Delta_kcal", "Alt3_Macro_Delta", "Alt3_Tip"
35
+ ]
36
+ missing = [c for c in needed if c not in df.columns]
37
+ if missing:
38
+ raise ValueError(f"Missing columns in dataset: {missing}")
39
+
40
+ df = df.dropna(subset=["Unhealthy_Food"]).reset_index(drop=True)
41
+ return df
42
+
43
+ # -------------------------
44
+ # Build FAISS index
45
+ # -------------------------
46
+ def build_index(texts):
47
+ model = SentenceTransformer(EMBED_MODEL_NAME)
48
+ embs = model.encode(texts, convert_to_numpy=True, show_progress_bar=True)
49
+ faiss.normalize_L2(embs)
50
+ index = faiss.IndexFlatIP(embs.shape[1])
51
+ index.add(embs)
52
+ return model, index
53
+
54
+ # -------------------------
55
+ # Find closest match & return its 3 alternatives
56
+ # -------------------------
57
+ def find_row(user_food, topk_rows=1):
58
+ q = (user_food or "").strip()
59
+ if not q:
60
+ return []
61
+ q_emb = model.encode([q], convert_to_numpy=True)
62
+ faiss.normalize_L2(q_emb)
63
+ D, I = index.search(q_emb, topk_rows)
64
+ return I[0].tolist()
65
+
66
+ def to_three_alternatives(row):
67
+ return [
68
+ {
69
+ "Rank": 1,
70
+ "Healthier Alternative": row["Alt1_Name"],
71
+ "Description": row["Alt1_Description"],
72
+ "Calorie/Nutrient Difference": f'{row["Alt1_Estimated_Calorie_Delta_kcal"]} kcal; {row["Alt1_Macro_Delta"]}',
73
+ "Tip": row["Alt1_Tip"],
74
+ },
75
+ {
76
+ "Rank": 2,
77
+ "Healthier Alternative": row["Alt2_Name"],
78
+ "Description": row["Alt2_Description"],
79
+ "Calorie/Nutrient Difference": f'{row["Alt2_Estimated_Calorie_Delta_kcal"]} kcal; {row["Alt2_Macro_Delta"]}',
80
+ "Tip": row["Alt2_Tip"],
81
+ },
82
+ {
83
+ "Rank": 3,
84
+ "Healthier Alternative": row["Alt3_Name"],
85
+ "Description": row["Alt3_Description"],
86
+ "Calorie/Nutrient Difference": f'{row["Alt3_Estimated_Calorie_Delta_kcal"]} kcal; {row["Alt3_Macro_Delta"]}',
87
+ "Tip": row["Alt3_Tip"],
88
+ },
89
+ ]
90
+
91
+ # -------------------------
92
+ # UI logic
93
+ # -------------------------
94
+ def search_ui(user_food):
95
+ idxs = find_row(user_food, 1)
96
+ if not idxs:
97
+ return f"**You entered:** _{user_food}_\n\nNo matches found.", None
98
+ row = df.iloc[idxs[0]]
99
+ echoed = f"**You entered:** _{user_food}_"
100
+ table = pd.DataFrame(to_three_alternatives(row), columns=[
101
+ "Rank", "Healthier Alternative", "Description", "Calorie/Nutrient Difference", "Tip"
102
+ ])
103
+ return echoed, table
104
+
105
+ def build_interface():
106
+ examples = [
107
+ ["Hamburger"],
108
+ ["Cheeseburger"],
109
+ ["Pepperoni Pizza"],
110
+ ["Fried Chicken Sandwich"],
111
+ ["Nachos"],
112
+ ["Mac and Cheese"],
113
+ ]
114
+ with gr.Blocks(title="Healthy Food Alternatives") as demo:
115
+ gr.Markdown("# 🥗 Healthy Food Alternatives\nType a food you like to see healthier options.")
116
+ with gr.Row():
117
+ with gr.Column(scale=1):
118
+ inp = gr.Textbox(label="Enter a food you like", placeholder="e.g., Hamburger")
119
+ btn = gr.Button("Find Healthier Alternatives", variant="primary")
120
+ gr.Examples(examples=examples, inputs=inp, label="Try one")
121
+ with gr.Column(scale=2):
122
+ echoed = gr.Markdown()
123
+ table = gr.Dataframe(
124
+ headers=["Rank", "Healthier Alternative", "Description", "Calorie/Nutrient Difference", "Tip"],
125
+ row_count=(3, "fixed"),
126
+ wrap=True
127
+ )
128
+ btn.click(search_ui, inputs=inp, outputs=[echoed, table])
129
+ inp.submit(search_ui, inputs=inp, outputs=[echoed, table])
130
+ return demo
131
+
132
+ # -------------------------
133
+ # Boot
134
+ # -------------------------
135
+ df = load_dataset()
136
+ model, index = build_index(df["Unhealthy_Food"].astype(str).tolist())
137
+ app = build_interface()
138
+
139
+ if __name__ == "__main__":
140
+ app.launch()
141
+