Spaces:

skshimada
/

Hello

Sleeping

App Files Files Community

skshimada commited on Feb 18

Commit

73d9e71

verified ·

1 Parent(s): c4c69b9

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -16

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ from PIL import Image
 from transformers import pipeline
 from langchain_chroma import Chroma
 from langchain_community.document_loaders import PyPDFLoader, TextLoader
 from langchain_core.documents import Document
 from langchain_huggingface import HuggingFaceEmbeddings
 from ultralytics import YOLO
@@ -27,9 +28,11 @@ vision_pipe = pipeline(
 print("📚 Loading Embedding Engine...")
 embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
-# --- BOTTLE DETECTION (JUST FOR DEBUG GALLERY NOW) ---
 def get_bottle_crops(image_path):
     found_crops = []
     try:
         original_img = Image.open(image_path).convert("RGB")
         img_w, img_h = original_img.size
@@ -41,20 +44,24 @@ def get_bottle_crops(image_path):
             for box in r.boxes:
                 if int(box.cls) in [39, 40, 41]:
                     x1, y1, x2, y2 = box.xyxy[0].tolist()
                     box_w, box_h = x2 - x1, y2 - y1
                     pad_x, pad_y = int(box_w * 0.25), int(box_h * 0.25)
                     x1, y1 = max(0, x1 - pad_x), max(0, y1 - pad_y)
                     x2, y2 = min(img_w, x2 + pad_x), min(img_h, y2 + pad_y)
                     found_crops.append(original_img.crop((x1, y1, x2, y2)))
         del yolo_model
         gc.collect()
         return found_crops if found_crops else [original_img]
-    except Exception:
         return []
-# --- RECIPE INGESTION (HARD CUT METHOD) ---
 def ingest_recipes(files):
     if not files: return "❌ No files uploaded."
@@ -82,37 +89,49 @@ def ingest_recipes(files):
     except Exception as e:
         return f"❌ Database Error: {e}"
-# --- BARTENDER LOGIC (SPEED OPTIMIZED) ---
 def bartend(message, history, img_path, inventory):
     debug_images = []
     if img_path:
-        # Run YOLO just so the user can see what it isolated in the gallery
         crops = get_bottle_crops(img_path)
         debug_images = crops
-        # WE NOW USE THE FULL IMAGE FOR THE AI TO GUARANTEE IT SEES THE BRAND
-        target_img = Image.open(img_path).convert("RGB")
         def identify_spirit(image_input):
-            # 🚀 SPEED FIX 1: Shrink massive phone photos to 512x512
-            # This stops the CPU from choking on millions of pixels
-            image_input.thumbnail((512, 512))
             prompt = "User: <image>\nRead the label. What is the specific brand and type of alcohol? Be precise.\nAssistant:"
-            # 🚀 SPEED FIX 2: Max 15 tokens. CPU takes ~1s per token. Less tokens = much faster.
-            out = vision_pipe(image_input, prompt, generate_kwargs={"max_new_tokens": 15})
             text = out[0]['generated_text']
             if "Assistant:" in text: return text.split("Assistant:")[-1].strip()
             return text.replace("User: <image>", "").strip()
         try:
-            # 🚀 SPEED FIX 3: Single Pass. No more running the vision model twice.
-            print("🔍 Starting Vision Pass (Speed Optimized)...")
             inventory = identify_spirit(target_img)
             inventory = re.sub(r'<.*?>', '', inventory).strip().split('.')[0]
-            print(f"✅ Vision Result: {inventory}")
         except Exception as e:
             print(f"❌ Vision Failed: {e}")
@@ -125,7 +144,7 @@ def bartend(message, history, img_path, inventory):
                 vs = Chroma(persist_directory=CHROMA_PATH, embedding_function=embed_model)
                 search_query = f"Cocktail recipe using {inventory}"
-                # Fetch top 4 recipes
                 results = vs.similarity_search(search_query, k=4)
                 recipe_context = "\n\n---\n\n".join([d.page_content for d in results])
         except Exception as e:

 from transformers import pipeline
 from langchain_chroma import Chroma
 from langchain_community.document_loaders import PyPDFLoader, TextLoader
+from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_core.documents import Document
 from langchain_huggingface import HuggingFaceEmbeddings
 from ultralytics import YOLO
 print("📚 Loading Embedding Engine...")
 embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+# --- BOTTLE DETECTION ---
 def get_bottle_crops(image_path):
+    print(f"🔍 DEBUG: Starting YOLO on {image_path}")
     found_crops = []
     try:
         original_img = Image.open(image_path).convert("RGB")
         img_w, img_h = original_img.size
             for box in r.boxes:
                 if int(box.cls) in [39, 40, 41]:
                     x1, y1, x2, y2 = box.xyxy[0].tolist()
+                    # 25% Padding to ensure the label isn't cut off
                     box_w, box_h = x2 - x1, y2 - y1
                     pad_x, pad_y = int(box_w * 0.25), int(box_h * 0.25)
                     x1, y1 = max(0, x1 - pad_x), max(0, y1 - pad_y)
                     x2, y2 = min(img_w, x2 + pad_x), min(img_h, y2 + pad_y)
                     found_crops.append(original_img.crop((x1, y1, x2, y2)))
         del yolo_model
         gc.collect()
         return found_crops if found_crops else [original_img]
+    except Exception as e:
+        print(f"❌ YOLO Error: {e}")
         return []
+# --- RECIPE INGESTION ---
 def ingest_recipes(files):
     if not files: return "❌ No files uploaded."
     except Exception as e:
         return f"❌ Database Error: {e}"
+# --- BARTENDER LOGIC ---
 def bartend(message, history, img_path, inventory):
     debug_images = []
     if img_path:
         crops = get_bottle_crops(img_path)
         debug_images = crops
+        # SPEED FIX 1: We return to using the tight crop, discarding the heavy background!
+        target_img = crops[0] if crops else Image.open(img_path).convert("RGB")
         def identify_spirit(image_input):
+            # SPEED FIX 2: Aggressive squishing.
+            # We copy the image so we don't blur the gallery debug version
+            fast_img = image_input.copy()
+            if fast_img.mode != "RGB": fast_img = fast_img.convert("RGB")
+            # Shrink down to a max of 384x384. This makes CPU math practically instant.
+            fast_img.thumbnail((384, 384))
             prompt = "User: <image>\nRead the label. What is the specific brand and type of alcohol? Be precise.\nAssistant:"
+            # Keep token limit at 15. The 'brain' (Chroma) handles the long text, the 'eyes' just need to read the brand name.
+            out = vision_pipe(fast_img, prompt, generate_kwargs={"max_new_tokens": 15})
             text = out[0]['generated_text']
             if "Assistant:" in text: return text.split("Assistant:")[-1].strip()
             return text.replace("User: <image>", "").strip()
         try:
             inventory = identify_spirit(target_img)
             inventory = re.sub(r'<.*?>', '', inventory).strip().split('.')[0]
+            print(f"🔍 Pass 1 Result: {inventory}")
+            generic_terms = ["vodka", "gin", "rum", "tequila", "whiskey", "whisky", "bourbon", "brandy", "alcohol", "liquor", "spirit", "bottle", "drink"]
+            # ONLY fallback to the heavy full image if the crop failed us
+            if inventory.lower() in generic_terms or len(inventory) < 4:
+                print("⚠️ Result too generic. Trying FULL IMAGE...")
+                full_img_result = identify_spirit(Image.open(img_path))
+                full_img_result = re.sub(r'<.*?>', '', full_img_result).strip().split('.')[0]
+                if len(full_img_result) > len(inventory):
+                    inventory = full_img_result
+                    print(f"✅ Pass 2 Result: {inventory}")
         except Exception as e:
             print(f"❌ Vision Failed: {e}")
                 vs = Chroma(persist_directory=CHROMA_PATH, embedding_function=embed_model)
                 search_query = f"Cocktail recipe using {inventory}"
+                # Fetch top 4 distinct recipes
                 results = vs.similarity_search(search_query, k=4)
                 recipe_context = "\n\n---\n\n".join([d.page_content for d in results])
         except Exception as e: