skshimada commited on
Commit
ddb1921
·
verified ·
1 Parent(s): 993f3d0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -21
app.py CHANGED
@@ -27,32 +27,55 @@ vision_pipe = pipeline(
27
  print("📚 Loading Embedding Engine...")
28
  embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
29
 
30
- # --- BOTTLE DETECTION (YOLO) ---
31
  def get_bottle_crops(image_path):
 
 
 
32
  try:
33
- yolo_model = YOLO("yolov8n.pt")
34
- results = yolo_model(image_path, verbose=False)
35
- found_crops = []
36
  original_img = Image.open(image_path)
 
 
 
 
 
 
 
37
  for r in results:
38
  for box in r.boxes:
39
- if int(box.cls) == 39: # Bottle index
40
  x1, y1, x2, y2 = box.xyxy[0].tolist()
41
  w, h = original_img.size
42
- # Add margin for context
 
43
  x1, y1 = max(0, x1 - 20), max(0, y1 - 20)
44
  x2, y2 = min(w, x2 + 20), min(h, y2 + 20)
45
- found_crops.append(original_img.crop((x1, y1, x2, y2)))
 
 
 
46
  del yolo_model
47
  gc.collect()
 
 
 
 
 
 
48
  return found_crops
 
49
  except Exception as e:
50
- print(f"YOLO Error: {e}")
51
- return []
 
 
 
52
 
53
  # --- RECIPE INGESTION ---
54
  def ingest_recipes(files):
55
  if not files: return "❌ No files uploaded."
 
56
  docs = []
57
  for f in files:
58
  try:
@@ -65,7 +88,8 @@ def ingest_recipes(files):
65
  except Exception as e:
66
  print(f"Error loading {f.name}: {e}")
67
 
68
- if not docs: return "❌ Could not extract text."
 
69
 
70
  vector_store = Chroma.from_documents(
71
  documents=docs,
@@ -76,9 +100,14 @@ def ingest_recipes(files):
76
 
77
  # --- BARTENDER LOGIC ---
78
  def bartend(message, history, img_path, inventory):
 
 
79
  # 1. Vision Scanning
80
  if img_path:
81
  crops = get_bottle_crops(img_path)
 
 
 
82
  target_img = crops[0] if crops else Image.open(img_path)
83
 
84
  prompt_text = "User: <image>\nWhat is the brand and type of alcohol in this image? Answer briefly.\nAssistant:"
@@ -92,6 +121,7 @@ def bartend(message, history, img_path, inventory):
92
  else:
93
  inventory = raw_label.replace(prompt_text, "").strip()
94
 
 
95
  inventory = re.sub(r'<.*?>', '', inventory).strip().split('.')[0]
96
 
97
  except Exception as e:
@@ -100,6 +130,7 @@ def bartend(message, history, img_path, inventory):
100
 
101
  # 2. RAG (Recipe Search)
102
  recipe_context = ""
 
103
  if inventory and inventory not in ["Empty Shelf", "Unknown Spirit", ""]:
104
  try:
105
  if os.path.exists(CHROMA_PATH):
@@ -110,19 +141,20 @@ def bartend(message, history, img_path, inventory):
110
  except Exception as e:
111
  print(f"Search error: {e}")
112
 
113
- # 3. Create the Response
114
  if inventory == "Unknown Spirit":
115
- response = "I'm having trouble reading that label. Try taking a closer photo of just the brand name."
116
  elif recipe_context:
117
- response = f"I see you have **{inventory}**. Here is a recipe from your collection:\n\n{recipe_context}"
118
  else:
119
- response = f"I see you have **{inventory}**! I don't have a specific recipe for that in the library yet."
120
 
121
- # Standard Dictionary Format for Gradio 6.0
122
  history.append({"role": "user", "content": message})
123
  history.append({"role": "assistant", "content": response})
124
 
125
- return history, inventory
 
126
 
127
  # --- UI LAYOUT ---
128
  with gr.Blocks() as demo:
@@ -134,20 +166,25 @@ with gr.Blocks() as demo:
134
  file_up = gr.File(label="1. Upload Recipe PDFs/TXTs", file_count="multiple")
135
  ingest_btn = gr.Button("📥 Load Recipes into Memory")
136
  status = gr.Textbox(label="System Status", value="Ready")
 
137
  gr.Markdown("---")
138
  img = gr.Image(type="filepath", label="2. Photo of your Bottle")
139
 
 
 
 
 
140
  with gr.Column(scale=2):
141
- # FIXED LINE BELOW: Removed type="messages"
142
  chatbot = gr.Chatbot(height=500, label="Bartender Chat")
143
  msg = gr.Textbox(label="3. Your Message", placeholder="Ask for a drink suggestion...")
144
  send_btn = gr.Button("Mix It Up", variant="primary")
145
 
146
- # Connect the buttons
147
  ingest_btn.click(ingest_recipes, file_up, status)
148
- msg.submit(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state])
149
- send_btn.click(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state])
 
 
150
 
151
  if __name__ == "__main__":
152
- # Theme moved here for Gradio 6.0 compatibility
153
  demo.launch(theme=gr.themes.Soft())
 
27
  print("📚 Loading Embedding Engine...")
28
  embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
29
 
30
+ # --- BOTTLE DETECTION (DEBUG MODE) ---
31
  def get_bottle_crops(image_path):
32
+ print(f"🔍 DEBUG: Starting YOLO on {image_path}")
33
+ found_crops = []
34
+
35
  try:
36
+ # Load original to verify path
 
 
37
  original_img = Image.open(image_path)
38
+
39
+ # Initialize YOLO (weights download automatically)
40
+ yolo_model = YOLO("yolov8n.pt")
41
+
42
+ # Lower confidence to 0.1 to catch even partial bottles
43
+ results = yolo_model(image_path, verbose=True, conf=0.1)
44
+
45
  for r in results:
46
  for box in r.boxes:
47
+ if int(box.cls) == 39: # Bottle
48
  x1, y1, x2, y2 = box.xyxy[0].tolist()
49
  w, h = original_img.size
50
+
51
+ # Pad the crop by 20px so we don't cut off text
52
  x1, y1 = max(0, x1 - 20), max(0, y1 - 20)
53
  x2, y2 = min(w, x2 + 20), min(h, y2 + 20)
54
+
55
+ crop = original_img.crop((x1, y1, x2, y2))
56
+ found_crops.append(crop)
57
+
58
  del yolo_model
59
  gc.collect()
60
+
61
+ # FALLBACK: If YOLO misses, return the full image so the AI has SOMETHING to look at
62
+ if not found_crops:
63
+ print("⚠️ DEBUG: No bottles found. Returning full image.")
64
+ return [original_img]
65
+
66
  return found_crops
67
+
68
  except Exception as e:
69
+ print(f"YOLO CRASH: {e}")
70
+ try:
71
+ return [Image.open(image_path)]
72
+ except:
73
+ return []
74
 
75
  # --- RECIPE INGESTION ---
76
  def ingest_recipes(files):
77
  if not files: return "❌ No files uploaded."
78
+
79
  docs = []
80
  for f in files:
81
  try:
 
88
  except Exception as e:
89
  print(f"Error loading {f.name}: {e}")
90
 
91
+ if not docs:
92
+ return "❌ Could not extract text from files."
93
 
94
  vector_store = Chroma.from_documents(
95
  documents=docs,
 
100
 
101
  # --- BARTENDER LOGIC ---
102
  def bartend(message, history, img_path, inventory):
103
+ debug_images = []
104
+
105
  # 1. Vision Scanning
106
  if img_path:
107
  crops = get_bottle_crops(img_path)
108
+ debug_images = crops # Save crops to show in the gallery
109
+
110
+ # Use the first crop (or full image if fallback triggered)
111
  target_img = crops[0] if crops else Image.open(img_path)
112
 
113
  prompt_text = "User: <image>\nWhat is the brand and type of alcohol in this image? Answer briefly.\nAssistant:"
 
121
  else:
122
  inventory = raw_label.replace(prompt_text, "").strip()
123
 
124
+ # Clean up punctuation
125
  inventory = re.sub(r'<.*?>', '', inventory).strip().split('.')[0]
126
 
127
  except Exception as e:
 
130
 
131
  # 2. RAG (Recipe Search)
132
  recipe_context = ""
133
+ # Only search if we have a valid spirit name
134
  if inventory and inventory not in ["Empty Shelf", "Unknown Spirit", ""]:
135
  try:
136
  if os.path.exists(CHROMA_PATH):
 
141
  except Exception as e:
142
  print(f"Search error: {e}")
143
 
144
+ # 3. Create Response
145
  if inventory == "Unknown Spirit":
146
+ response = "I'm having trouble reading that label. Check the 'Vision Debug' gallery below—is the crop clear?"
147
  elif recipe_context:
148
+ response = f"I see you have **{inventory}**. Here is a recipe I found in your collection:\n\n{recipe_context}"
149
  else:
150
+ response = f"I see you have **{inventory}**! I don't have a specific recipe for that in the current library. Should I suggest a classic drink?"
151
 
152
+ # Add to chat history (Dictionary format for Gradio 6.0)
153
  history.append({"role": "user", "content": message})
154
  history.append({"role": "assistant", "content": response})
155
 
156
+ # Return 3 items: History, Inventory State, and the Debug Images
157
+ return history, inventory, debug_images
158
 
159
  # --- UI LAYOUT ---
160
  with gr.Blocks() as demo:
 
166
  file_up = gr.File(label="1. Upload Recipe PDFs/TXTs", file_count="multiple")
167
  ingest_btn = gr.Button("📥 Load Recipes into Memory")
168
  status = gr.Textbox(label="System Status", value="Ready")
169
+
170
  gr.Markdown("---")
171
  img = gr.Image(type="filepath", label="2. Photo of your Bottle")
172
 
173
+ # VISION DEBUG (Restored)
174
+ with gr.Accordion("🔍 Vision Debug (See what the AI sees)", open=True):
175
+ debug_gallery = gr.Gallery(label="YOLO Crops", columns=2, height="auto")
176
+
177
  with gr.Column(scale=2):
 
178
  chatbot = gr.Chatbot(height=500, label="Bartender Chat")
179
  msg = gr.Textbox(label="3. Your Message", placeholder="Ask for a drink suggestion...")
180
  send_btn = gr.Button("Mix It Up", variant="primary")
181
 
182
+ # Event Wiring
183
  ingest_btn.click(ingest_recipes, file_up, status)
184
+
185
+ # Both inputs trigger the same function with 3 outputs
186
+ msg.submit(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state, debug_gallery])
187
+ send_btn.click(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state, debug_gallery])
188
 
189
  if __name__ == "__main__":
 
190
  demo.launch(theme=gr.themes.Soft())