Spaces:

MatanKriel
/

Food_Recommender

Sleeping

App Files Files Community

MatanKriel commited on Dec 30, 2025

Commit

9bb2979

verified ·

1 Parent(s): 0ffa00f

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -124

app.py CHANGED Viewed

@@ -2,149 +2,96 @@ import gradio as gr
 import torch
 import pandas as pd
 import numpy as np
-import os
 from PIL import Image
-from transformers import AutoProcessor, AutoModel
-from datasets import load_dataset
-from torch.nn import functional as F
-# --- 1. SETUP & CONFIG ---
-MODEL_ID = "google/siglip-base-patch16-224"
-DATA_FILE = "food_embeddings_siglip.parquet"
-print(f"⏳ Starting App... Loading Model: {MODEL_ID}...")
-try:
-    model = AutoModel.from_pretrained(MODEL_ID)
-    processor = AutoProcessor.from_pretrained(MODEL_ID)
-except Exception as e:
-    print(f"❌ Model Error: {e}")
-# --- 2. LOAD DATA ---
-# --- 2. LOAD DATA (SMART MATCHING) ---
-print("⏳ Loading Dataset...")
-# 1. Load the Embeddings File FIRST
 df = pd.read_parquet(DATA_FILE)
-valid_indices = df.index.tolist() # Assuming you preserved the original indices in the dataframe index
-# OR if you reset the index in the notebook, we just check the length:
-num_embeddings = len(df)
-print(f"   👉 Embeddings file has {num_embeddings} rows.")
-# 2. Load the Dataset
-dataset_full = load_dataset("ethz/food101", split="train").shuffle(seed=42).select(range(5000))
-# 3. CRITICAL FIX: If lengths don't match, we assume the parquet is a subset.
-# (This is a guess - if you didn't save the original indices, this might still be slightly off,
-# but it prevents the 'IndexError' crash).
-if len(dataset_full) > num_embeddings:
-    print(f"⚠️ DATA MISMATCH DETECTED: Dataset has {len(dataset_full)} but Parquet has {num_embeddings}.")
-    print("   ✂️ Truncating dataset to match Parquet length...")
-    dataset = dataset_full.select(range(num_embeddings))
-else:
-    dataset = dataset_full
-print(f"✅ Final Dataset Size: {len(dataset)}")
-# --- 3. LOAD EMBEDDINGS ---
-print(f"⏳ Loading Embeddings from {DATA_FILE}...")
-try:
-    df = pd.read_parquet(DATA_FILE)
-    db_features = torch.tensor(np.stack(df['embedding'].to_numpy()))
-    db_features = F.normalize(db_features, p=2, dim=1)
-    print("✅ System Ready!")
-except Exception as e:
-    print(f"❌ Error loading parquet file: {e}")
-    db_features = None
-# --- 4. CORE SEARCH LOGIC (SAFE MODE) ---
-def find_best_matches(query_features, top_k=3):
-    if db_features is None:
-        return []
-    # Normalize query
-    query_features = F.normalize(query_features, p=2, dim=1)
-    # Similarity Search
-    similarity = torch.mm(query_features, db_features.T)
-    scores, indices = torch.topk(similarity, k=top_k)
     results = []
-    for idx, score in zip(indices[0], scores[0]):
         idx = idx.item()
-        # 1. Get the raw image
-        img_data = dataset[idx]['image']
-        # 2. Resize it to be small & fast (300x300 max)
-        img_data.thumbnail((300, 300))
-        # 3. Save to a temporary path (prevents the "Too much data" crash)
-        save_path = f"/tmp/temp_result_{idx}.jpg"
-        img_data.save(save_path)
-        label = df.iloc[idx]['label_name']
-        # 4. Return the PATH (string), NOT the image object
-        results.append((save_path, f"{label} ({score:.2f})"))
     return results
-# --- 5. GRADIO FUNCTIONS ---
-def search_by_image(input_image):
-    if input_image is None: return []
-    inputs = processor(images=input_image, return_tensors="pt")
-    with torch.no_grad():
-        features = model.get_image_features(**inputs)
-    return find_best_matches(features)
-def search_by_text(input_text):
-    if not input_text: return []
-    inputs = processor(text=[input_text], return_tensors="pt", padding="max_length")
-    with torch.no_grad():
-        features = model.get_text_features(**inputs)
-    return find_best_matches(features)
-# --- 6. BUILD UI (Clean & Centered) ---
-custom_css = """
-.gradio-container { width: 100%; max-width: 1000px; margin: 0 auto !important; }
-h1 { text-align: center; color: #E67E22; }
-"""
-with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="Food Matcher AI") as demo:
     with gr.Row():
-        gr.Markdown("# 🍔 Visual Dish Matcher (SigLIP)")
-    gr.Markdown("Upload a food photo or describe a craving. We'll find the closest matches.", elem_classes=["center-text"])
-    with gr.Accordion("📺 Watch Demo Video", open=False):
-        gr.HTML('<div style="display:flex; justify-content:center;"><iframe width="560" height="315" src="https://www.youtube.com/embed/IXeIxYHi0Es" frameborder="0" allowfullscreen></iframe></div>')
-    with gr.Tab("🖼️ Search by Image"):
-        with gr.Row():
-            with gr.Column(scale=1):
-                img_input = gr.Image(type="pil", label="Your Photo", height=300)
-                btn_img = gr.Button("🔍 Find Matches", variant="primary", size="lg")
-            with gr.Column(scale=2):
-                img_gallery = gr.Gallery(label="Similar Dishes", columns=3, height=350, object_fit="contain")
-        btn_img.click(search_by_image, inputs=img_input, outputs=img_gallery)
-    with gr.Tab("📝 Search by Text"):
-        with gr.Row():
-            with gr.Column(scale=1):
-                txt_input = gr.Textbox(label="Describe it", placeholder="e.g. 'Spicy Tacos'", lines=4)
-                btn_txt = gr.Button("🔍 Search", variant="primary", size="lg")
-            with gr.Column(scale=2):
-                txt_gallery = gr.Gallery(label="Similar Dishes", columns=3, height=350, object_fit="contain")
-        btn_txt.click(search_by_text, inputs=txt_input, outputs=txt_gallery)
-    gr.Markdown("---")
-    gr.Markdown("By Matan Kriel & Odeya Shmuel | Powered by Google SigLIP")
-# Launch
-demo.launch()

 import torch
 import pandas as pd
 import numpy as np
+from transformers import AutoModel, AutoProcessor
 from PIL import Image
+import io
+# --- CONFIGURATION ---
+# ⚠️ IMPORTANT: Change this if 'MetaCLIP' or 'OpenAI CLIP' won your notebook battle!
+MODEL_ID = "google/siglip-base-patch16-224"
+DATA_FILE = "food_embeddings_best.parquet"
+print("⏳ Loading Model & Data...")
+# 1. Load Model (Only once)
+model = AutoModel.from_pretrained(MODEL_ID)
+processor = AutoProcessor.from_pretrained(MODEL_ID)
+# 2. Load the "Memory" (Parquet file)
 df = pd.read_parquet(DATA_FILE)
+# 3. Prepare the Database Vectors
+# Convert the dataframe column into a PyTorch Tensor
+all_vectors = np.stack(df['embedding'].to_numpy())
+db_features = torch.tensor(all_vectors)
+# (Optional: If your notebook didn't normalize, uncomment this.
+# But your notebook code already did, so we skip it to be fast!)
+# db_features = db_features / db_features.norm(p=2, dim=-1, keepdim=True)
+print("✅ System Ready!")
+def search(text_query, image_query):
+    # A. Decide: Is this a Text search or Image search?
+    if image_query:
+        # Process Image
+        inputs = processor(images=image_query, return_tensors="pt")
+        get_feat_func = model.get_image_features
+    elif text_query:
+        # Process Text
+        inputs = processor(text=[text_query], return_tensors="pt", padding=True)
+        get_feat_func = model.get_text_features
+    else:
+        return None
+    # B. Run Model (Inference)
+    with torch.no_grad():
+        query_vec = get_feat_func(**inputs)
+    # C. Search Logic (Pure Math)
+    # 1. Normalize Query (Math requirement: Vector / Magnitude)
+    query_vec = query_vec / query_vec.norm(p=2, dim=-1, keepdim=True)
+    # 2. Dot Product (Similarity)
+    scores = torch.mm(query_vec, db_features.T)
+    # 3. Get Top 5
+    top_scores, top_indices = torch.topk(scores, k=5)
+    # D. Fetch Results
     results = []
+    for idx, score in zip(top_indices[0], top_scores[0]):
         idx = idx.item()
+        row = df.iloc[idx]
+        # Handle Image Loading (Parquet saves images as binary/dict)
+        img_data = row['image']
+        if isinstance(img_data, dict) and 'bytes' in img_data:
+            img = Image.open(io.BytesIO(img_data['bytes']))
+        else:
+            img = img_data # It might already be a PIL object
+        results.append((img, f"{row['label_name']} ({score.item():.2f})"))
     return results
+# --- INTERFACE ---
+with gr.Blocks(title="AI Food Search") as demo:
+    gr.Markdown("# 🍔 AI Food Search")
     with gr.Row():
+        # Left: Inputs
+        with gr.Column():
+            txt_input = gr.Textbox(label="Search by Text", placeholder="e.g. 'spicy pepperoni pizza'")
+            img_input = gr.Image(type="pil", label="Or Search by Image")
+            btn = gr.Button("Search", variant="primary")
+        # Right: Output Gallery
+        with gr.Column():
+            gallery = gr.Gallery(label="Top Matches")
+    # Connect buttons
+    btn.click(fn=search, inputs=[txt_input, img_input], outputs=gallery)
+# Force bind to 0.0.0.0 for Spaces
+demo.launch(server_name="0.0.0.0", server_port=7860)