Matan Kriel commited on
Commit
8a253ea
·
1 Parent(s): f3b7d7a

Fix large file with LFS

Browse files
Files changed (3) hide show
  1. FoodApp.py +97 -0
  2. food_embeddings.parquet +3 -0
  3. requirements.txt +8 -0
FoodApp.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import pandas as pd
4
+ import numpy as np
5
+ from PIL import Image
6
+ from transformers import CLIPProcessor, CLIPModel
7
+ from datasets import load_dataset
8
+ from torch.nn import functional as F
9
+
10
+ # --- 1. SETUP & CONFIG ---
11
+ MODEL_ID = "openai/clip-vit-base-patch32"
12
+ DATA_FILE = "food_embeddings.parquet"
13
+
14
+ print("⏳ Starting App... Loading Model...")
15
+ # Load Model (CPU is fine for inference on single images)
16
+ model = CLIPModel.from_pretrained(MODEL_ID)
17
+ processor = CLIPProcessor.from_pretrained(MODEL_ID)
18
+
19
+ # --- 2. LOAD DATA (Must match Colab logic EXACTLY) ---
20
+ print("⏳ Loading Dataset (this takes a moment)...")
21
+ # We load the same 5000 images using the same seed so indices match the parquet file
22
+ dataset = load_dataset("ethz/food101", split="train").shuffle(seed=42).select(range(5000))
23
+
24
+ # --- 3. LOAD EMBEDDINGS ---
25
+ print("⏳ Loading Pre-computed Embeddings...")
26
+ df = pd.read_parquet(DATA_FILE)
27
+ # Convert the list of numbers in the parquet back to a Torch Tensor
28
+ db_features = torch.tensor(np.stack(df['embedding'].to_numpy()))
29
+ # Normalize once for speed
30
+ db_features = F.normalize(db_features, p=2, dim=1)
31
+
32
+ print("✅ App Ready!")
33
+
34
+ # --- 4. CORE SEARCH LOGIC ---
35
+ def find_best_matches(query_features, top_k=3):
36
+ # Normalize query
37
+ query_features = F.normalize(query_features, p=2, dim=1)
38
+
39
+ # Calculate Similarity (Dot Product)
40
+ # Query (1x512) * DB (5000x512) = Scores (1x5000)
41
+ similarity = torch.mm(query_features, db_features.T)
42
+
43
+ # Get Top K
44
+ scores, indices = torch.topk(similarity, k=top_k)
45
+
46
+ results = []
47
+ for idx, score in zip(indices[0], scores[0]):
48
+ idx = idx.item()
49
+
50
+ # Grab image and info from the loaded dataset
51
+ img = dataset[idx]['image']
52
+ label = df.iloc[idx]['label_name'] # Get label from our dataframe
53
+
54
+ # Format output
55
+ results.append((img, f"{label} ({score:.2f})"))
56
+ return results
57
+
58
+ # --- 5. GRADIO FUNCTIONS ---
59
+ def search_by_image(input_image):
60
+ if input_image is None: return []
61
+
62
+ inputs = processor(images=input_image, return_tensors="pt")
63
+ with torch.no_grad():
64
+ features = model.get_image_features(**inputs)
65
+
66
+ return find_best_matches(features)
67
+
68
+ def search_by_text(input_text):
69
+ if not input_text: return []
70
+
71
+ inputs = processor(text=[input_text], return_tensors="pt", padding=True)
72
+ with torch.no_grad():
73
+ features = model.get_text_features(**inputs)
74
+
75
+ return find_best_matches(features)
76
+
77
+ # --- 6. BUILD UI ---
78
+ with gr.Blocks(title="Food Matcher AI") as demo:
79
+ gr.Markdown("# 🍔 Visual Dish Matcher")
80
+ gr.Markdown("Upload a photo of food (or describe it) to find similar dishes in our database.")
81
+
82
+ with gr.Tab("Image Search"):
83
+ with gr.Row():
84
+ img_input = gr.Image(type="pil", label="Upload Food Image")
85
+ img_gallery = gr.Gallery(label="Top Matches")
86
+ btn_img = gr.Button("Find Similar Dishes")
87
+ btn_img.click(search_by_image, inputs=img_input, outputs=img_gallery)
88
+
89
+ with gr.Tab("Text Search"):
90
+ with gr.Row():
91
+ txt_input = gr.Textbox(label="Describe the food (e.g., 'Spicy Tacos')")
92
+ txt_gallery = gr.Gallery(label="Top Matches")
93
+ btn_txt = gr.Button("Search by Description")
94
+ btn_txt.click(search_by_text, inputs=txt_input, outputs=txt_gallery)
95
+
96
+ # Launch
97
+ demo.launch()
food_embeddings.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87bbf48a556dd11a473e2e168ef6f94cd9bc150ccabbcea9dda084b2cb9ca3b9
3
+ size 8828792
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ torch
3
+ transformers
4
+ pandas
5
+ numpy
6
+ datasets
7
+ pyarrow
8
+ scikit-learn