skshimada commited on
Commit
4ba62ef
Β·
verified Β·
1 Parent(s): ddb1921

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -36
app.py CHANGED
@@ -27,30 +27,38 @@ vision_pipe = pipeline(
27
  print("πŸ“š Loading Embedding Engine...")
28
  embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
29
 
30
- # --- BOTTLE DETECTION (DEBUG MODE) ---
31
  def get_bottle_crops(image_path):
32
  print(f"πŸ” DEBUG: Starting YOLO on {image_path}")
33
  found_crops = []
34
 
35
  try:
36
- # Load original to verify path
37
  original_img = Image.open(image_path)
 
38
 
39
- # Initialize YOLO (weights download automatically)
40
  yolo_model = YOLO("yolov8n.pt")
41
-
42
- # Lower confidence to 0.1 to catch even partial bottles
43
  results = yolo_model(image_path, verbose=True, conf=0.1)
44
 
45
  for r in results:
46
  for box in r.boxes:
47
  if int(box.cls) == 39: # Bottle
48
  x1, y1, x2, y2 = box.xyxy[0].tolist()
49
- w, h = original_img.size
50
 
51
- # Pad the crop by 20px so we don't cut off text
52
- x1, y1 = max(0, x1 - 20), max(0, y1 - 20)
53
- x2, y2 = min(w, x2 + 20), min(h, y2 + 20)
 
 
 
 
 
 
 
 
 
 
 
54
 
55
  crop = original_img.crop((x1, y1, x2, y2))
56
  found_crops.append(crop)
@@ -58,7 +66,6 @@ def get_bottle_crops(image_path):
58
  del yolo_model
59
  gc.collect()
60
 
61
- # FALLBACK: If YOLO misses, return the full image so the AI has SOMETHING to look at
62
  if not found_crops:
63
  print("⚠️ DEBUG: No bottles found. Returning full image.")
64
  return [original_img]
@@ -75,7 +82,6 @@ def get_bottle_crops(image_path):
75
  # --- RECIPE INGESTION ---
76
  def ingest_recipes(files):
77
  if not files: return "❌ No files uploaded."
78
-
79
  docs = []
80
  for f in files:
81
  try:
@@ -88,8 +94,7 @@ def ingest_recipes(files):
88
  except Exception as e:
89
  print(f"Error loading {f.name}: {e}")
90
 
91
- if not docs:
92
- return "❌ Could not extract text from files."
93
 
94
  vector_store = Chroma.from_documents(
95
  documents=docs,
@@ -105,32 +110,43 @@ def bartend(message, history, img_path, inventory):
105
  # 1. Vision Scanning
106
  if img_path:
107
  crops = get_bottle_crops(img_path)
108
- debug_images = crops # Save crops to show in the gallery
109
 
110
- # Use the first crop (or full image if fallback triggered)
111
  target_img = crops[0] if crops else Image.open(img_path)
112
 
113
- prompt_text = "User: <image>\nWhat is the brand and type of alcohol in this image? Answer briefly.\nAssistant:"
 
 
 
 
 
 
 
 
 
 
 
114
 
115
- try:
116
- output = vision_pipe(target_img, prompt=prompt_text, generate_kwargs={"max_new_tokens": 50})
117
- raw_label = output[0]['generated_text']
118
-
119
- if "Assistant:" in raw_label:
120
- inventory = raw_label.split("Assistant:")[-1].strip()
121
- else:
122
- inventory = raw_label.replace(prompt_text, "").strip()
123
-
124
- # Clean up punctuation
125
- inventory = re.sub(r'<.*?>', '', inventory).strip().split('.')[0]
126
 
127
- except Exception as e:
128
- print(f"Vision error: {e}")
129
- inventory = "Unknown Spirit"
 
130
 
131
  # 2. RAG (Recipe Search)
132
  recipe_context = ""
133
- # Only search if we have a valid spirit name
134
  if inventory and inventory not in ["Empty Shelf", "Unknown Spirit", ""]:
135
  try:
136
  if os.path.exists(CHROMA_PATH):
@@ -149,11 +165,9 @@ def bartend(message, history, img_path, inventory):
149
  else:
150
  response = f"I see you have **{inventory}**! I don't have a specific recipe for that in the current library. Should I suggest a classic drink?"
151
 
152
- # Add to chat history (Dictionary format for Gradio 6.0)
153
  history.append({"role": "user", "content": message})
154
  history.append({"role": "assistant", "content": response})
155
 
156
- # Return 3 items: History, Inventory State, and the Debug Images
157
  return history, inventory, debug_images
158
 
159
  # --- UI LAYOUT ---
@@ -170,7 +184,6 @@ with gr.Blocks() as demo:
170
  gr.Markdown("---")
171
  img = gr.Image(type="filepath", label="2. Photo of your Bottle")
172
 
173
- # VISION DEBUG (Restored)
174
  with gr.Accordion("πŸ” Vision Debug (See what the AI sees)", open=True):
175
  debug_gallery = gr.Gallery(label="YOLO Crops", columns=2, height="auto")
176
 
@@ -179,10 +192,8 @@ with gr.Blocks() as demo:
179
  msg = gr.Textbox(label="3. Your Message", placeholder="Ask for a drink suggestion...")
180
  send_btn = gr.Button("Mix It Up", variant="primary")
181
 
182
- # Event Wiring
183
  ingest_btn.click(ingest_recipes, file_up, status)
184
 
185
- # Both inputs trigger the same function with 3 outputs
186
  msg.submit(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state, debug_gallery])
187
  send_btn.click(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state, debug_gallery])
188
 
 
27
  print("πŸ“š Loading Embedding Engine...")
28
  embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
29
 
30
+ # --- BOTTLE DETECTION (SMART PADDING) ---
31
  def get_bottle_crops(image_path):
32
  print(f"πŸ” DEBUG: Starting YOLO on {image_path}")
33
  found_crops = []
34
 
35
  try:
 
36
  original_img = Image.open(image_path)
37
+ img_w, img_h = original_img.size
38
 
 
39
  yolo_model = YOLO("yolov8n.pt")
40
+ # Low confidence to catch everything
 
41
  results = yolo_model(image_path, verbose=True, conf=0.1)
42
 
43
  for r in results:
44
  for box in r.boxes:
45
  if int(box.cls) == 39: # Bottle
46
  x1, y1, x2, y2 = box.xyxy[0].tolist()
 
47
 
48
+ # --- NEW: Dynamic 25% Padding ---
49
+ # Calculate width and height of the detected box
50
+ box_w = x2 - x1
51
+ box_h = y2 - y1
52
+
53
+ # Expand by 25% of the box's own size
54
+ pad_x = int(box_w * 0.25)
55
+ pad_y = int(box_h * 0.25)
56
+
57
+ # Apply padding but stay within image bounds
58
+ x1 = max(0, x1 - pad_x)
59
+ y1 = max(0, y1 - pad_y)
60
+ x2 = min(img_w, x2 + pad_x)
61
+ y2 = min(img_h, y2 + pad_y)
62
 
63
  crop = original_img.crop((x1, y1, x2, y2))
64
  found_crops.append(crop)
 
66
  del yolo_model
67
  gc.collect()
68
 
 
69
  if not found_crops:
70
  print("⚠️ DEBUG: No bottles found. Returning full image.")
71
  return [original_img]
 
82
  # --- RECIPE INGESTION ---
83
  def ingest_recipes(files):
84
  if not files: return "❌ No files uploaded."
 
85
  docs = []
86
  for f in files:
87
  try:
 
94
  except Exception as e:
95
  print(f"Error loading {f.name}: {e}")
96
 
97
+ if not docs: return "❌ Could not extract text."
 
98
 
99
  vector_store = Chroma.from_documents(
100
  documents=docs,
 
110
  # 1. Vision Scanning
111
  if img_path:
112
  crops = get_bottle_crops(img_path)
113
+ debug_images = crops
114
 
115
+ # Start with the best crop
116
  target_img = crops[0] if crops else Image.open(img_path)
117
 
118
+ # Helper function to run vision model
119
+ def identify_spirit(image_input):
120
+ prompt = "User: <image>\nRead the label. What is the specific brand and type of alcohol? Be precise.\nAssistant:"
121
+ out = vision_pipe(image_input, prompt=prompt, generate_kwargs={"max_new_tokens": 50})
122
+ text = out[0]['generated_text']
123
+ if "Assistant:" in text:
124
+ return text.split("Assistant:")[-1].strip()
125
+ return text.replace("User: <image>", "").strip()
126
+
127
+ # Run First Pass (Crop)
128
+ inventory = identify_spirit(target_img)
129
+ inventory = re.sub(r'<.*?>', '', inventory).strip().split('.')[0]
130
 
131
+ print(f"πŸ” Pass 1 Result: {inventory}")
132
+
133
+ # --- NEW: The "Generic Fallback" Logic ---
134
+ # If the result is just a generic category, we missed the brand.
135
+ # Force a check on the FULL image.
136
+ generic_terms = ["vodka", "gin", "rum", "tequila", "whiskey", "whisky", "bourbon", "brandy", "alcohol", "liquor", "spirit", "bottle"]
137
+
138
+ if inventory.lower() in generic_terms or len(inventory) < 4:
139
+ print("⚠️ Result too generic. Trying FULL IMAGE...")
140
+ full_img_result = identify_spirit(Image.open(img_path))
141
+ full_img_result = re.sub(r'<.*?>', '', full_img_result).strip().split('.')[0]
142
 
143
+ # If the full image gave us a longer (more specific) name, use it
144
+ if len(full_img_result) > len(inventory):
145
+ inventory = full_img_result
146
+ print(f"βœ… Pass 2 Result: {inventory}")
147
 
148
  # 2. RAG (Recipe Search)
149
  recipe_context = ""
 
150
  if inventory and inventory not in ["Empty Shelf", "Unknown Spirit", ""]:
151
  try:
152
  if os.path.exists(CHROMA_PATH):
 
165
  else:
166
  response = f"I see you have **{inventory}**! I don't have a specific recipe for that in the current library. Should I suggest a classic drink?"
167
 
 
168
  history.append({"role": "user", "content": message})
169
  history.append({"role": "assistant", "content": response})
170
 
 
171
  return history, inventory, debug_images
172
 
173
  # --- UI LAYOUT ---
 
184
  gr.Markdown("---")
185
  img = gr.Image(type="filepath", label="2. Photo of your Bottle")
186
 
 
187
  with gr.Accordion("πŸ” Vision Debug (See what the AI sees)", open=True):
188
  debug_gallery = gr.Gallery(label="YOLO Crops", columns=2, height="auto")
189
 
 
192
  msg = gr.Textbox(label="3. Your Message", placeholder="Ask for a drink suggestion...")
193
  send_btn = gr.Button("Mix It Up", variant="primary")
194
 
 
195
  ingest_btn.click(ingest_recipes, file_up, status)
196
 
 
197
  msg.submit(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state, debug_gallery])
198
  send_btn.click(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state, debug_gallery])
199