# Trigger rebuild

import os, pathlib, numpy as np, pandas as pd, gradio as gr
from huggingface_hub import hf_hub_download
from sentence_transformers import SentenceTransformer

# --- CONFIG ---
HF_DATASET_REPO = "miazaitman/CheatClean"
HF_DATASET_FILE = "CheatClean Data set.csv"  # keep spaces
DATA_DIR = pathlib.Path("./data"); DATA_DIR.mkdir(exist_ok=True)
DATA_LOCAL = DATA_DIR / HF_DATASET_FILE
EMBED_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"

# --- Load dataset ---
def load_dataset():
    if not DATA_LOCAL.exists():
        hf_hub_download(
    repo_id=HF_DATASET_REPO,
    filename=HF_DATASET_FILE,
    repo_type="dataset",         
    local_dir=str(DATA_DIR),
    local_dir_use_symlinks=False
        )
    df = pd.read_csv(DATA_LOCAL)
    needed = [
        "Unhealthy_Food",
        "Alt1_Name","Alt1_Description","Alt1_Estimated_Calorie_Delta_kcal","Alt1_Macro_Delta","Alt1_Tip",
        "Alt2_Name","Alt2_Description","Alt2_Estimated_Calorie_Delta_kcal","Alt2_Macro_Delta","Alt2_Tip",
        "Alt3_Name","Alt3_Description","Alt3_Estimated_Calorie_Delta_kcal","Alt3_Macro_Delta","Alt3_Tip",
    ]
    missing = [c for c in needed if c not in df.columns]
    if missing:
        raise ValueError(f"Missing columns: {missing}")
    return df.dropna(subset=["Unhealthy_Food"]).reset_index(drop=True)

# --- Embeddings (no FAISS) ---
def build_embeddings(texts):
    model = SentenceTransformer(EMBED_MODEL_NAME)
    embs = model.encode(list(texts), convert_to_numpy=True, show_progress_bar=True)
    norms = np.linalg.norm(embs, axis=1, keepdims=True) + 1e-12
    embs = embs / norms  # L2-normalize for cosine similarity
    return model, embs

def cosine_top_row(query, model, embs):
    if not query or not query.strip():
        return None
    q = query.strip()
    q_emb = model.encode([q], convert_to_numpy=True)
    q_emb = q_emb / (np.linalg.norm(q_emb, axis=1, keepdims=True) + 1e-12)
    scores = embs @ q_emb.T   # shape (N,1)
    return int(np.argmax(scores[:, 0]))

def to_three_alternatives(row):
    return [
        {"Rank": 1, "Healthier Alternative": row["Alt1_Name"],
         "Description": row["Alt1_Description"],
         "Calorie/Nutrient Difference": f'{row["Alt1_Estimated_Calorie_Delta_kcal"]} kcal; {row["Alt1_Macro_Delta"]}',
         "Tip": row["Alt1_Tip"]},
        {"Rank": 2, "Healthier Alternative": row["Alt2_Name"],
         "Description": row["Alt2_Description"],
         "Calorie/Nutrient Difference": f'{row["Alt2_Estimated_Calorie_Delta_kcal"]} kcal; {row["Alt2_Macro_Delta"]}',
         "Tip": row["Alt2_Tip"]},
        {"Rank": 3, "Healthier Alternative": row["Alt3_Name"],
         "Description": row["Alt3_Description"],
         "Calorie/Nutrient Difference": f'{row["Alt3_Estimated_Calorie_Delta_kcal"]} kcal; {row["Alt3_Macro_Delta"]}',
         "Tip": row["Alt3_Tip"]},
    ]

# --- UI ---
def search_ui(user_food):
    idx = cosine_top_row(user_food, model, embs)
    if idx is None:
        return f"**You entered:** _{user_food}_\n\nNo matches found.", None
    row = df.iloc[idx]
    echoed = f"**You entered:** _{user_food}_"
    table = pd.DataFrame(
        to_three_alternatives(row),
        columns=["Rank","Healthier Alternative","Description","Calorie/Nutrient Difference","Tip"]
    )
    return echoed, table

def build_interface():
    examples = [["Hamburger"],["Cheeseburger"],["Pepperoni Pizza"],
                ["Fried Chicken Sandwich"],["Nachos"],["Mac and Cheese"]]
    with gr.Blocks(title="Healthy Food Alternatives") as demo:
        gr.Markdown("# 🥗 Healthy Food Alternatives\nType a food you like to see healthier options.")
        with gr.Row():
            with gr.Column(scale=1):
                inp = gr.Textbox(label="Enter a food you like", placeholder="e.g., Hamburger")
                btn = gr.Button("Find Healthier Alternatives", variant="primary")
                gr.Examples(examples=examples, inputs=inp, label="Try one")
            with gr.Column(scale=2):
                echoed = gr.Markdown()
                table = gr.Dataframe(headers=["Rank","Healthier Alternative","Description","Calorie/Nutrient Difference","Tip"],
                                     row_count=(3,"fixed"), wrap=True)
        btn.click(search_ui, inputs=inp, outputs=[echoed, table])
        inp.submit(search_ui, inputs=inp, outputs=[echoed, table])
    return demo

# --- Boot ---
df = load_dataset()
model, embs = build_embeddings(df["Unhealthy_Food"].astype(str).tolist())
def quick_eval(samples=("Hamburger","Nachos","Pepperoni Pizza")):
    print("=== Quick Eval (cosine top-1 row -> 3 alts) ===")
    for s in samples:
        idx = cosine_top_row(s, model, embs)
        r = df.iloc[idx]
        print(f"\nQuery: {s}  ->  Row match: {r['Unhealthy_Food']}")
        for x in to_three_alternatives(r):
            print(f"  {x['Rank']}. {x['Healthier Alternative']}  |  {x['Calorie/Nutrient Difference']}")

# call it once at startup
quick_eval()
app = build_interface()

if __name__ == "__main__":
    app.launch()