Spaces:

skshimada
/

Hello

Sleeping

App Files Files Community

skshimada commited on Feb 18

Commit

c4c69b9

verified ·

1 Parent(s): 527c0df

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -69

app.py CHANGED Viewed

@@ -27,128 +27,92 @@ vision_pipe = pipeline(
 print("📚 Loading Embedding Engine...")
 embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
-# --- BOTTLE DETECTION ---
 def get_bottle_crops(image_path):
-    print(f"🔍 DEBUG: Starting YOLO on {image_path}")
     found_crops = []
     try:
         original_img = Image.open(image_path).convert("RGB")
         img_w, img_h = original_img.size
         yolo_model = YOLO("yolov8n.pt")
-        results = yolo_model(image_path, verbose=True, conf=0.1)
         for r in results:
             for box in r.boxes:
-                if int(box.cls) in [39, 40, 41]: # Bottle, Wine Glass, Cup
                     x1, y1, x2, y2 = box.xyxy[0].tolist()
-                    # Dynamic 25% Padding
-                    box_w = x2 - x1
-                    box_h = y2 - y1
-                    pad_x = int(box_w * 0.25)
-                    pad_y = int(box_h * 0.25)
-                    x1 = max(0, x1 - pad_x)
-                    y1 = max(0, y1 - pad_y)
-                    x2 = min(img_w, x2 + pad_x)
-                    y2 = min(img_h, y2 + pad_y)
-                    crop = original_img.crop((x1, y1, x2, y2))
-                    found_crops.append(crop)
         del yolo_model
         gc.collect()
-        if not found_crops:
-            print("⚠️ DEBUG: No bottles found. Returning full image.")
-            return [original_img]
-        return found_crops
-    except Exception as e:
-        print(f"❌ YOLO CRASH: {e}")
-        try:
-            return [Image.open(image_path).convert("RGB")]
-        except:
-            return []
-# --- RECIPE INGESTION (THE "HARD CUT" FIX) ---
 def ingest_recipes(files):
     if not files: return "❌ No files uploaded."
     docs = []
     for f in files:
         try:
-            if f.name.endswith(".txt"):
-                loader = TextLoader(f.name)
-                docs.extend(loader.load())
-            elif f.name.endswith(".pdf"):
-                loader = PyPDFLoader(f.name)
-                docs.extend(loader.load())
-        except Exception as e:
-            print(f"Error loading {f.name}: {e}")
     if not docs: return "❌ Could not extract text."
-    # 1. Combine all pages/files into one massive text block
     full_text = "\n".join([d.page_content for d in docs])
-    # 2. Strict Split: Cut exactly at the start of any line that says "Recipe:"
-    # (?m)^ means "look at the start of a line"
     raw_chunks = re.split(r'(?m)^(?=Recipe:)', full_text)
     split_docs = []
     for chunk in raw_chunks:
-        # Clean out those long '⸻' separator lines
         clean_chunk = re.sub(r'⸻+', '', chunk).strip()
-        # If the chunk actually has text in it, save it as a standalone recipe
         if len(clean_chunk) > 20:
             split_docs.append(Document(page_content=clean_chunk))
-    # 3. Save to Database
     try:
-        vector_store = Chroma.from_documents(
-            documents=split_docs,
-            embedding=embed_model,
-            persist_directory=CHROMA_PATH
-        )
         return f"✅ Bar library updated. Strictly split into {len(split_docs)} individual recipes."
     except Exception as e:
         return f"❌ Database Error: {e}"
-# --- BARTENDER LOGIC ---
 def bartend(message, history, img_path, inventory):
     debug_images = []
     if img_path:
         crops = get_bottle_crops(img_path)
         debug_images = crops
-        target_img = crops[0] if crops else Image.open(img_path).convert("RGB")
         def identify_spirit(image_input):
-            if image_input.mode != "RGB": image_input = image_input.convert("RGB")
             prompt = "User: <image>\nRead the label. What is the specific brand and type of alcohol? Be precise.\nAssistant:"
-            out = vision_pipe(image_input, prompt, generate_kwargs={"max_new_tokens": 50})
             text = out[0]['generated_text']
             if "Assistant:" in text: return text.split("Assistant:")[-1].strip()
             return text.replace("User: <image>", "").strip()
         try:
             inventory = identify_spirit(target_img)
             inventory = re.sub(r'<.*?>', '', inventory).strip().split('.')[0]
-            print(f"🔍 Pass 1 Result: {inventory}")
-            generic_terms = ["vodka", "gin", "rum", "tequila", "whiskey", "whisky", "bourbon", "brandy", "alcohol", "liquor", "spirit", "bottle", "drink"]
-            if inventory.lower() in generic_terms or len(inventory) < 4:
-                print("⚠️ Result too generic. Trying FULL IMAGE...")
-                full_img_result = identify_spirit(Image.open(img_path).convert("RGB"))
-                full_img_result = re.sub(r'<.*?>', '', full_img_result).strip().split('.')[0]
-                if len(full_img_result) > len(inventory):
-                    inventory = full_img_result
-                    print(f"✅ Pass 2 Result: {inventory}")
         except Exception as e:
             print(f"❌ Vision Failed: {e}")
@@ -161,7 +125,7 @@ def bartend(message, history, img_path, inventory):
                 vs = Chroma(persist_directory=CHROMA_PATH, embedding_function=embed_model)
                 search_query = f"Cocktail recipe using {inventory}"
-                # Retrieve the top 4 closest matching recipes
                 results = vs.similarity_search(search_query, k=4)
                 recipe_context = "\n\n---\n\n".join([d.page_content for d in results])
         except Exception as e:
@@ -193,7 +157,7 @@ with gr.Blocks() as demo:
             gr.Markdown("---")
             img = gr.Image(type="filepath", label="2. Photo of your Bottle")
-            with gr.Accordion("🔍 Vision Debug", open=True):
                 debug_gallery = gr.Gallery(label="YOLO Crops", columns=2, height="auto")
         with gr.Column(scale=2):

 print("📚 Loading Embedding Engine...")
 embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+# --- BOTTLE DETECTION (JUST FOR DEBUG GALLERY NOW) ---
 def get_bottle_crops(image_path):
     found_crops = []
     try:
         original_img = Image.open(image_path).convert("RGB")
         img_w, img_h = original_img.size
         yolo_model = YOLO("yolov8n.pt")
+        results = yolo_model(image_path, verbose=False, conf=0.1)
         for r in results:
             for box in r.boxes:
+                if int(box.cls) in [39, 40, 41]:
                     x1, y1, x2, y2 = box.xyxy[0].tolist()
+                    box_w, box_h = x2 - x1, y2 - y1
+                    pad_x, pad_y = int(box_w * 0.25), int(box_h * 0.25)
+                    x1, y1 = max(0, x1 - pad_x), max(0, y1 - pad_y)
+                    x2, y2 = min(img_w, x2 + pad_x), min(img_h, y2 + pad_y)
+                    found_crops.append(original_img.crop((x1, y1, x2, y2)))
         del yolo_model
         gc.collect()
+        return found_crops if found_crops else [original_img]
+    except Exception:
+        return []
+# --- RECIPE INGESTION (HARD CUT METHOD) ---
 def ingest_recipes(files):
     if not files: return "❌ No files uploaded."
     docs = []
     for f in files:
         try:
+            if f.name.endswith(".txt"): docs.extend(TextLoader(f.name).load())
+            elif f.name.endswith(".pdf"): docs.extend(PyPDFLoader(f.name).load())
+        except Exception as e: print(f"Error: {e}")
     if not docs: return "❌ Could not extract text."
     full_text = "\n".join([d.page_content for d in docs])
     raw_chunks = re.split(r'(?m)^(?=Recipe:)', full_text)
     split_docs = []
     for chunk in raw_chunks:
         clean_chunk = re.sub(r'⸻+', '', chunk).strip()
         if len(clean_chunk) > 20:
             split_docs.append(Document(page_content=clean_chunk))
     try:
+        Chroma.from_documents(split_docs, embed_model, persist_directory=CHROMA_PATH)
         return f"✅ Bar library updated. Strictly split into {len(split_docs)} individual recipes."
     except Exception as e:
         return f"❌ Database Error: {e}"
+# --- BARTENDER LOGIC (SPEED OPTIMIZED) ---
 def bartend(message, history, img_path, inventory):
     debug_images = []
     if img_path:
+        # Run YOLO just so the user can see what it isolated in the gallery
         crops = get_bottle_crops(img_path)
         debug_images = crops
+        # WE NOW USE THE FULL IMAGE FOR THE AI TO GUARANTEE IT SEES THE BRAND
+        target_img = Image.open(img_path).convert("RGB")
         def identify_spirit(image_input):
+            # 🚀 SPEED FIX 1: Shrink massive phone photos to 512x512
+            # This stops the CPU from choking on millions of pixels
+            image_input.thumbnail((512, 512))
             prompt = "User: <image>\nRead the label. What is the specific brand and type of alcohol? Be precise.\nAssistant:"
+            # 🚀 SPEED FIX 2: Max 15 tokens. CPU takes ~1s per token. Less tokens = much faster.
+            out = vision_pipe(image_input, prompt, generate_kwargs={"max_new_tokens": 15})
             text = out[0]['generated_text']
             if "Assistant:" in text: return text.split("Assistant:")[-1].strip()
             return text.replace("User: <image>", "").strip()
         try:
+            # 🚀 SPEED FIX 3: Single Pass. No more running the vision model twice.
+            print("🔍 Starting Vision Pass (Speed Optimized)...")
             inventory = identify_spirit(target_img)
             inventory = re.sub(r'<.*?>', '', inventory).strip().split('.')[0]
+            print(f"✅ Vision Result: {inventory}")
         except Exception as e:
             print(f"❌ Vision Failed: {e}")
                 vs = Chroma(persist_directory=CHROMA_PATH, embedding_function=embed_model)
                 search_query = f"Cocktail recipe using {inventory}"
+                # Fetch top 4 recipes
                 results = vs.similarity_search(search_query, k=4)
                 recipe_context = "\n\n---\n\n".join([d.page_content for d in results])
         except Exception as e:
             gr.Markdown("---")
             img = gr.Image(type="filepath", label="2. Photo of your Bottle")
+            with gr.Accordion("🔍 Vision Debug", open=False):
                 debug_gallery = gr.Gallery(label="YOLO Crops", columns=2, height="auto")
         with gr.Column(scale=2):