Spaces:

miazaitman
/

cheat-clean

Sleeping

File size: 5,024 Bytes

bbe06bd
 
50ccb88
4f52e4a
 
 
50ccb88
4f52e4a
50ccb88
4f52e4a
 
 
 
50ccb88
4f52e4a
 
 
1e7d23b
 
 
 
 
4f52e4a
 
 
 
50ccb88
 
 
4f52e4a
 
 
50ccb88
 
4f52e4a
50ccb88
 
4f52e4a
50ccb88
 
 
 
4f52e4a
50ccb88
 
 
 
4f52e4a
50ccb88
 
 
4f52e4a
 
 
50ccb88
 
 
 
 
 
 
 
 
 
 
 
4f52e4a
 
50ccb88
4f52e4a
50ccb88
 
4f52e4a
50ccb88
4f52e4a
50ccb88
 
 
 
4f52e4a
 
 
50ccb88
 
4f52e4a
 
 
 
 
 
 
 
 
50ccb88
 
4f52e4a
 
 
 
50ccb88
4f52e4a
50ccb88
e860f71
 
 
 
 
 
 
 
 
 
 
4f52e4a

# Trigger rebuild

import os, pathlib, numpy as np, pandas as pd, gradio as gr
from huggingface_hub import hf_hub_download
from sentence_transformers import SentenceTransformer

# --- CONFIG ---
HF_DATASET_REPO = "miazaitman/CheatClean"
HF_DATASET_FILE = "CheatClean Data set.csv"  # keep spaces
DATA_DIR = pathlib.Path("./data"); DATA_DIR.mkdir(exist_ok=True)
DATA_LOCAL = DATA_DIR / HF_DATASET_FILE
EMBED_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"

# --- Load dataset ---
def load_dataset():
    if not DATA_LOCAL.exists():
        hf_hub_download(
    repo_id=HF_DATASET_REPO,
    filename=HF_DATASET_FILE,
    repo_type="dataset",         
    local_dir=str(DATA_DIR),
    local_dir_use_symlinks=False
        )
    df = pd.read_csv(DATA_LOCAL)
    needed = [
        "Unhealthy_Food",
        "Alt1_Name","Alt1_Description","Alt1_Estimated_Calorie_Delta_kcal","Alt1_Macro_Delta","Alt1_Tip",
        "Alt2_Name","Alt2_Description","Alt2_Estimated_Calorie_Delta_kcal","Alt2_Macro_Delta","Alt2_Tip",
        "Alt3_Name","Alt3_Description","Alt3_Estimated_Calorie_Delta_kcal","Alt3_Macro_Delta","Alt3_Tip",
    ]
    missing = [c for c in needed if c not in df.columns]
    if missing:
        raise ValueError(f"Missing columns: {missing}")
    return df.dropna(subset=["Unhealthy_Food"]).reset_index(drop=True)

# --- Embeddings (no FAISS) ---
def build_embeddings(texts):
    model = SentenceTransformer(EMBED_MODEL_NAME)
    embs = model.encode(list(texts), convert_to_numpy=True, show_progress_bar=True)
    norms = np.linalg.norm(embs, axis=1, keepdims=True) + 1e-12
    embs = embs / norms  # L2-normalize for cosine similarity
    return model, embs

def cosine_top_row(query, model, embs):
    if not query or not query.strip():
        return None
    q = query.strip()
    q_emb = model.encode([q], convert_to_numpy=True)
    q_emb = q_emb / (np.linalg.norm(q_emb, axis=1, keepdims=True) + 1e-12)
    scores = embs @ q_emb.T   # shape (N,1)
    return int(np.argmax(scores[:, 0]))

def to_three_alternatives(row):
    return [
        {"Rank": 1, "Healthier Alternative": row["Alt1_Name"],
         "Description": row["Alt1_Description"],
         "Calorie/Nutrient Difference": f'{row["Alt1_Estimated_Calorie_Delta_kcal"]} kcal; {row["Alt1_Macro_Delta"]}',
         "Tip": row["Alt1_Tip"]},
        {"Rank": 2, "Healthier Alternative": row["Alt2_Name"],
         "Description": row["Alt2_Description"],
         "Calorie/Nutrient Difference": f'{row["Alt2_Estimated_Calorie_Delta_kcal"]} kcal; {row["Alt2_Macro_Delta"]}',
         "Tip": row["Alt2_Tip"]},
        {"Rank": 3, "Healthier Alternative": row["Alt3_Name"],
         "Description": row["Alt3_Description"],
         "Calorie/Nutrient Difference": f'{row["Alt3_Estimated_Calorie_Delta_kcal"]} kcal; {row["Alt3_Macro_Delta"]}',
         "Tip": row["Alt3_Tip"]},
    ]

# --- UI ---
def search_ui(user_food):
    idx = cosine_top_row(user_food, model, embs)
    if idx is None:
        return f"**You entered:** _{user_food}_\n\nNo matches found.", None
    row = df.iloc[idx]
    echoed = f"**You entered:** _{user_food}_"
    table = pd.DataFrame(
        to_three_alternatives(row),
        columns=["Rank","Healthier Alternative","Description","Calorie/Nutrient Difference","Tip"]
    )
    return echoed, table

def build_interface():
    examples = [["Hamburger"],["Cheeseburger"],["Pepperoni Pizza"],
                ["Fried Chicken Sandwich"],["Nachos"],["Mac and Cheese"]]
    with gr.Blocks(title="Healthy Food Alternatives") as demo:
        gr.Markdown("# 🥗 Healthy Food Alternatives\nType a food you like to see healthier options.")
        with gr.Row():
            with gr.Column(scale=1):
                inp = gr.Textbox(label="Enter a food you like", placeholder="e.g., Hamburger")
                btn = gr.Button("Find Healthier Alternatives", variant="primary")
                gr.Examples(examples=examples, inputs=inp, label="Try one")
            with gr.Column(scale=2):
                echoed = gr.Markdown()
                table = gr.Dataframe(headers=["Rank","Healthier Alternative","Description","Calorie/Nutrient Difference","Tip"],
                                     row_count=(3,"fixed"), wrap=True)
        btn.click(search_ui, inputs=inp, outputs=[echoed, table])
        inp.submit(search_ui, inputs=inp, outputs=[echoed, table])
    return demo

# --- Boot ---
df = load_dataset()
model, embs = build_embeddings(df["Unhealthy_Food"].astype(str).tolist())
def quick_eval(samples=("Hamburger","Nachos","Pepperoni Pizza")):
    print("=== Quick Eval (cosine top-1 row -> 3 alts) ===")
    for s in samples:
        idx = cosine_top_row(s, model, embs)
        r = df.iloc[idx]
        print(f"\nQuery: {s}  ->  Row match: {r['Unhealthy_Food']}")
        for x in to_three_alternatives(r):
            print(f"  {x['Rank']}. {x['Healthier Alternative']}  |  {x['Calorie/Nutrient Difference']}")

# call it once at startup
quick_eval()
app = build_interface()

if __name__ == "__main__":
    app.launch()