Spaces:

skshimada
/

Hello

Sleeping

App Files Files Community

skshimada commited on 11 days ago

Commit

4ba62ef

verified ·

1 Parent(s): ddb1921

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -36

app.py CHANGED Viewed

@@ -27,30 +27,38 @@ vision_pipe = pipeline(
 print("📚 Loading Embedding Engine...")
 embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
-# --- BOTTLE DETECTION (DEBUG MODE) ---
 def get_bottle_crops(image_path):
     print(f"🔍 DEBUG: Starting YOLO on {image_path}")
     found_crops = []
     try:
-        # Load original to verify path
         original_img = Image.open(image_path)
-        # Initialize YOLO (weights download automatically)
         yolo_model = YOLO("yolov8n.pt")
-        # Lower confidence to 0.1 to catch even partial bottles
         results = yolo_model(image_path, verbose=True, conf=0.1)
         for r in results:
             for box in r.boxes:
                 if int(box.cls) == 39: # Bottle
                     x1, y1, x2, y2 = box.xyxy[0].tolist()
-                    w, h = original_img.size
-                    # Pad the crop by 20px so we don't cut off text
-                    x1, y1 = max(0, x1 - 20), max(0, y1 - 20)
-                    x2, y2 = min(w, x2 + 20), min(h, y2 + 20)
                     crop = original_img.crop((x1, y1, x2, y2))
                     found_crops.append(crop)
@@ -58,7 +66,6 @@ def get_bottle_crops(image_path):
         del yolo_model
         gc.collect()
-        # FALLBACK: If YOLO misses, return the full image so the AI has SOMETHING to look at
         if not found_crops:
             print("⚠️ DEBUG: No bottles found. Returning full image.")
             return [original_img]
@@ -75,7 +82,6 @@ def get_bottle_crops(image_path):
 # --- RECIPE INGESTION ---
 def ingest_recipes(files):
     if not files: return "❌ No files uploaded."
     docs = []
     for f in files:
         try:
@@ -88,8 +94,7 @@ def ingest_recipes(files):
         except Exception as e:
             print(f"Error loading {f.name}: {e}")
-    if not docs:
-        return "❌ Could not extract text from files."
     vector_store = Chroma.from_documents(
         documents=docs,
@@ -105,32 +110,43 @@ def bartend(message, history, img_path, inventory):
     # 1. Vision Scanning
     if img_path:
         crops = get_bottle_crops(img_path)
-        debug_images = crops # Save crops to show in the gallery
-        # Use the first crop (or full image if fallback triggered)
         target_img = crops[0] if crops else Image.open(img_path)
-        prompt_text = "User: <image>\nWhat is the brand and type of alcohol in this image? Answer briefly.\nAssistant:"
-        try:
-            output = vision_pipe(target_img, prompt=prompt_text, generate_kwargs={"max_new_tokens": 50})
-            raw_label = output[0]['generated_text']
-            if "Assistant:" in raw_label:
-                inventory = raw_label.split("Assistant:")[-1].strip()
-            else:
-                inventory = raw_label.replace(prompt_text, "").strip()
-            # Clean up punctuation
-            inventory = re.sub(r'<.*?>', '', inventory).strip().split('.')[0]
-        except Exception as e:
-            print(f"Vision error: {e}")
-            inventory = "Unknown Spirit"
     # 2. RAG (Recipe Search)
     recipe_context = ""
-    # Only search if we have a valid spirit name
     if inventory and inventory not in ["Empty Shelf", "Unknown Spirit", ""]:
         try:
             if os.path.exists(CHROMA_PATH):
@@ -149,11 +165,9 @@ def bartend(message, history, img_path, inventory):
     else:
         response = f"I see you have **{inventory}**! I don't have a specific recipe for that in the current library. Should I suggest a classic drink?"
-    # Add to chat history (Dictionary format for Gradio 6.0)
     history.append({"role": "user", "content": message})
     history.append({"role": "assistant", "content": response})
-    # Return 3 items: History, Inventory State, and the Debug Images
     return history, inventory, debug_images
 # --- UI LAYOUT ---
@@ -170,7 +184,6 @@ with gr.Blocks() as demo:
             gr.Markdown("---")
             img = gr.Image(type="filepath", label="2. Photo of your Bottle")
-            # VISION DEBUG (Restored)
             with gr.Accordion("🔍 Vision Debug (See what the AI sees)", open=True):
                 debug_gallery = gr.Gallery(label="YOLO Crops", columns=2, height="auto")
@@ -179,10 +192,8 @@ with gr.Blocks() as demo:
             msg = gr.Textbox(label="3. Your Message", placeholder="Ask for a drink suggestion...")
             send_btn = gr.Button("Mix It Up", variant="primary")
-    # Event Wiring
     ingest_btn.click(ingest_recipes, file_up, status)
-    # Both inputs trigger the same function with 3 outputs
     msg.submit(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state, debug_gallery])
     send_btn.click(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state, debug_gallery])

 print("📚 Loading Embedding Engine...")
 embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+# --- BOTTLE DETECTION (SMART PADDING) ---
 def get_bottle_crops(image_path):
     print(f"🔍 DEBUG: Starting YOLO on {image_path}")
     found_crops = []
     try:
         original_img = Image.open(image_path)
+        img_w, img_h = original_img.size
         yolo_model = YOLO("yolov8n.pt")
+        # Low confidence to catch everything
         results = yolo_model(image_path, verbose=True, conf=0.1)
         for r in results:
             for box in r.boxes:
                 if int(box.cls) == 39: # Bottle
                     x1, y1, x2, y2 = box.xyxy[0].tolist()
+                    # --- NEW: Dynamic 25% Padding ---
+                    # Calculate width and height of the detected box
+                    box_w = x2 - x1
+                    box_h = y2 - y1
+                    # Expand by 25% of the box's own size
+                    pad_x = int(box_w * 0.25)
+                    pad_y = int(box_h * 0.25)
+                    # Apply padding but stay within image bounds
+                    x1 = max(0, x1 - pad_x)
+                    y1 = max(0, y1 - pad_y)
+                    x2 = min(img_w, x2 + pad_x)
+                    y2 = min(img_h, y2 + pad_y)
                     crop = original_img.crop((x1, y1, x2, y2))
                     found_crops.append(crop)
         del yolo_model
         gc.collect()
         if not found_crops:
             print("⚠️ DEBUG: No bottles found. Returning full image.")
             return [original_img]
 # --- RECIPE INGESTION ---
 def ingest_recipes(files):
     if not files: return "❌ No files uploaded."
     docs = []
     for f in files:
         try:
         except Exception as e:
             print(f"Error loading {f.name}: {e}")
+    if not docs: return "❌ Could not extract text."
     vector_store = Chroma.from_documents(
         documents=docs,
     # 1. Vision Scanning
     if img_path:
         crops = get_bottle_crops(img_path)
+        debug_images = crops
+        # Start with the best crop
         target_img = crops[0] if crops else Image.open(img_path)
+        # Helper function to run vision model
+        def identify_spirit(image_input):
+            prompt = "User: <image>\nRead the label. What is the specific brand and type of alcohol? Be precise.\nAssistant:"
+            out = vision_pipe(image_input, prompt=prompt, generate_kwargs={"max_new_tokens": 50})
+            text = out[0]['generated_text']
+            if "Assistant:" in text:
+                return text.split("Assistant:")[-1].strip()
+            return text.replace("User: <image>", "").strip()
+        # Run First Pass (Crop)
+        inventory = identify_spirit(target_img)
+        inventory = re.sub(r'<.*?>', '', inventory).strip().split('.')[0]
+        print(f"🔍 Pass 1 Result: {inventory}")
+        # --- NEW: The "Generic Fallback" Logic ---
+        # If the result is just a generic category, we missed the brand.
+        # Force a check on the FULL image.
+        generic_terms = ["vodka", "gin", "rum", "tequila", "whiskey", "whisky", "bourbon", "brandy", "alcohol", "liquor", "spirit", "bottle"]
+        if inventory.lower() in generic_terms or len(inventory) < 4:
+            print("⚠️ Result too generic. Trying FULL IMAGE...")
+            full_img_result = identify_spirit(Image.open(img_path))
+            full_img_result = re.sub(r'<.*?>', '', full_img_result).strip().split('.')[0]
+            # If the full image gave us a longer (more specific) name, use it
+            if len(full_img_result) > len(inventory):
+                inventory = full_img_result
+                print(f"✅ Pass 2 Result: {inventory}")
     # 2. RAG (Recipe Search)
     recipe_context = ""
     if inventory and inventory not in ["Empty Shelf", "Unknown Spirit", ""]:
         try:
             if os.path.exists(CHROMA_PATH):
     else:
         response = f"I see you have **{inventory}**! I don't have a specific recipe for that in the current library. Should I suggest a classic drink?"
     history.append({"role": "user", "content": message})
     history.append({"role": "assistant", "content": response})
     return history, inventory, debug_images
 # --- UI LAYOUT ---
             gr.Markdown("---")
             img = gr.Image(type="filepath", label="2. Photo of your Bottle")
             with gr.Accordion("🔍 Vision Debug (See what the AI sees)", open=True):
                 debug_gallery = gr.Gallery(label="YOLO Crops", columns=2, height="auto")
             msg = gr.Textbox(label="3. Your Message", placeholder="Ask for a drink suggestion...")
             send_btn = gr.Button("Mix It Up", variant="primary")
     ingest_btn.click(ingest_recipes, file_up, status)
     msg.submit(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state, debug_gallery])
     send_btn.click(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state, debug_gallery])