Spaces:

skshimada
/

Hello

Sleeping

App Files Files Community

skshimada commited on 11 days ago

Commit

33c5c81

verified ·

1 Parent(s): ce0a4da

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -32

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ from PIL import Image
 from transformers import pipeline
 from langchain_chroma import Chroma
 from langchain_community.document_loaders import PyPDFLoader, TextLoader
 from langchain_core.documents import Document
 from langchain_huggingface import HuggingFaceEmbeddings
 from ultralytics import YOLO
@@ -27,7 +28,7 @@ vision_pipe = pipeline(
 print("📚 Loading Embedding Engine...")
 embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
-# --- BOTTLE DETECTION (SMART PADDING) ---
 def get_bottle_crops(image_path):
     print(f"🔍 DEBUG: Starting YOLO on {image_path}")
     found_crops = []
@@ -37,13 +38,11 @@ def get_bottle_crops(image_path):
         img_w, img_h = original_img.size
         yolo_model = YOLO("yolov8n.pt")
-        # Extremely low confidence to catch anything
         results = yolo_model(image_path, verbose=True, conf=0.1)
         for r in results:
             for box in r.boxes:
-                # Class 39 is bottle. We also check Class 40 (Wine glass) or 41 (Cup) just in case
-                if int(box.cls) in [39, 40, 41]:
                     x1, y1, x2, y2 = box.xyxy[0].tolist()
                     # Dynamic 25% Padding
@@ -76,9 +75,10 @@ def get_bottle_crops(image_path):
         except:
             return []
-# --- RECIPE INGESTION ---
 def ingest_recipes(files):
     if not files: return "❌ No files uploaded."
     docs = []
     for f in files:
         try:
@@ -93,12 +93,21 @@ def ingest_recipes(files):
     if not docs: return "❌ Could not extract text."
     vector_store = Chroma.from_documents(
-        documents=docs,
         embedding=embed_model,
         persist_directory=CHROMA_PATH
     )
-    return f"✅ Bar library updated with {len(docs)} items."
 # --- BARTENDER LOGIC ---
 def bartend(message, history, img_path, inventory):
@@ -108,48 +117,34 @@ def bartend(message, history, img_path, inventory):
     if img_path:
         crops = get_bottle_crops(img_path)
         debug_images = crops
-        # Start with the best crop
         target_img = crops[0] if crops else Image.open(img_path).convert("RGB")
-        # Helper function with FIXED calling signature
         def identify_spirit(image_input):
-            # Ensure image is RGB to prevent pipeline errors
-            if image_input.mode != "RGB":
-                image_input = image_input.convert("RGB")
-            prompt = "User: <image>\nRead the label on the bottle. What is the specific brand and type of alcohol? Be precise.\nAssistant:"
-            # FIXED: Passing prompt as a positional argument (the second argument)
-            # This fixes the "ValueError: You must provide text" error
             out = vision_pipe(image_input, prompt, generate_kwargs={"max_new_tokens": 50})
             text = out[0]['generated_text']
-            if "Assistant:" in text:
-                return text.split("Assistant:")[-1].strip()
             return text.replace("User: <image>", "").strip()
-        # Run Pass 1
         try:
             inventory = identify_spirit(target_img)
             inventory = re.sub(r'<.*?>', '', inventory).strip().split('.')[0]
             print(f"🔍 Pass 1 Result: {inventory}")
-            # Generic Fallback Logic
-            generic_terms = ["vodka", "gin", "rum", "tequila", "whiskey", "whisky", "bourbon", "brandy", "alcohol", "liquor", "spirit", "bottle", "drink", "glass"]
-            # If the answer is too short or generic, try the FULL image
             if inventory.lower() in generic_terms or len(inventory) < 4:
                 print("⚠��� Result too generic. Trying FULL IMAGE...")
                 full_img_result = identify_spirit(Image.open(img_path).convert("RGB"))
                 full_img_result = re.sub(r'<.*?>', '', full_img_result).strip().split('.')[0]
                 if len(full_img_result) > len(inventory):
                     inventory = full_img_result
                     print(f"✅ Pass 2 Result: {inventory}")
         except Exception as e:
-            print(f"❌ Vision Pipeline Failed: {e}")
             inventory = "Unknown Spirit"
     # 2. RAG (Recipe Search)
@@ -159,7 +154,9 @@ def bartend(message, history, img_path, inventory):
             if os.path.exists(CHROMA_PATH):
                 vs = Chroma(persist_directory=CHROMA_PATH, embedding_function=embed_model)
                 search_query = f"Cocktail recipe using {inventory}"
-                results = vs.similarity_search(search_query, k=2)
                 recipe_context = "\n---\n".join([d.page_content for d in results])
         except Exception as e:
             print(f"Search error: {e}")
@@ -168,11 +165,10 @@ def bartend(message, history, img_path, inventory):
     if inventory == "Unknown Spirit":
         response = "I'm having trouble reading that label. Check the 'Vision Debug' gallery below—is the crop clear?"
     elif recipe_context:
-        response = f"I see you have **{inventory}**. Here is a recipe I found in your collection:\n\n{recipe_context}"
     else:
         response = f"I see you have **{inventory}**! I don't have a specific recipe for that in the current library. Should I suggest a classic drink?"
-    # Gradio 6.0 Dictionary Format
     history.append({"role": "user", "content": message})
     history.append({"role": "assistant", "content": response})
@@ -201,7 +197,6 @@ with gr.Blocks() as demo:
             send_btn = gr.Button("Mix It Up", variant="primary")
     ingest_btn.click(ingest_recipes, file_up, status)
     msg.submit(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state, debug_gallery])
     send_btn.click(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state, debug_gallery])

 from transformers import pipeline
 from langchain_chroma import Chroma
 from langchain_community.document_loaders import PyPDFLoader, TextLoader
+from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_core.documents import Document
 from langchain_huggingface import HuggingFaceEmbeddings
 from ultralytics import YOLO
 print("📚 Loading Embedding Engine...")
 embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+# --- BOTTLE DETECTION ---
 def get_bottle_crops(image_path):
     print(f"🔍 DEBUG: Starting YOLO on {image_path}")
     found_crops = []
         img_w, img_h = original_img.size
         yolo_model = YOLO("yolov8n.pt")
         results = yolo_model(image_path, verbose=True, conf=0.1)
         for r in results:
             for box in r.boxes:
+                if int(box.cls) in [39, 40, 41]: # Bottle, Wine Glass, Cup
                     x1, y1, x2, y2 = box.xyxy[0].tolist()
                     # Dynamic 25% Padding
         except:
             return []
+# --- RECIPE INGESTION (NOW WITH SCISSORS!) ---
 def ingest_recipes(files):
     if not files: return "❌ No files uploaded."
     docs = []
     for f in files:
         try:
     if not docs: return "❌ Could not extract text."
+    # --- THE FIX: SPLIT TEXT INTO RECIPES ---
+    # We split by "Recipe:" or newlines to ensure each drink is its own 'chunk'
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=600,       # Approximate size of one recipe
+        chunk_overlap=50,     # Slight overlap to don't cut words
+        separators=["\nRecipe:", "Recipe:", "\n\n", "\n"] # Priority splitters
+    )
+    splits = text_splitter.split_documents(docs)
     vector_store = Chroma.from_documents(
+        documents=splits, # We ingest the SPLITS, not the whole doc
         embedding=embed_model,
         persist_directory=CHROMA_PATH
     )
+    return f"✅ Bar library updated. Split into {len(splits)} individual recipes."
 # --- BARTENDER LOGIC ---
 def bartend(message, history, img_path, inventory):
     if img_path:
         crops = get_bottle_crops(img_path)
         debug_images = crops
         target_img = crops[0] if crops else Image.open(img_path).convert("RGB")
         def identify_spirit(image_input):
+            if image_input.mode != "RGB": image_input = image_input.convert("RGB")
+            prompt = "User: <image>\nRead the label. What is the specific brand and type of alcohol? Be precise.\nAssistant:"
+            # Positional argument fix
             out = vision_pipe(image_input, prompt, generate_kwargs={"max_new_tokens": 50})
             text = out[0]['generated_text']
+            if "Assistant:" in text: return text.split("Assistant:")[-1].strip()
             return text.replace("User: <image>", "").strip()
         try:
             inventory = identify_spirit(target_img)
             inventory = re.sub(r'<.*?>', '', inventory).strip().split('.')[0]
             print(f"🔍 Pass 1 Result: {inventory}")
+            # Generic Fallback
+            generic_terms = ["vodka", "gin", "rum", "tequila", "whiskey", "whisky", "bourbon", "brandy", "alcohol", "liquor", "spirit", "bottle", "drink"]
             if inventory.lower() in generic_terms or len(inventory) < 4:
                 print("⚠��� Result too generic. Trying FULL IMAGE...")
                 full_img_result = identify_spirit(Image.open(img_path).convert("RGB"))
                 full_img_result = re.sub(r'<.*?>', '', full_img_result).strip().split('.')[0]
                 if len(full_img_result) > len(inventory):
                     inventory = full_img_result
                     print(f"✅ Pass 2 Result: {inventory}")
         except Exception as e:
+            print(f"❌ Vision Failed: {e}")
             inventory = "Unknown Spirit"
     # 2. RAG (Recipe Search)
             if os.path.exists(CHROMA_PATH):
                 vs = Chroma(persist_directory=CHROMA_PATH, embedding_function=embed_model)
                 search_query = f"Cocktail recipe using {inventory}"
+                # INCREASED K to 5 to give you more options
+                results = vs.similarity_search(search_query, k=5)
                 recipe_context = "\n---\n".join([d.page_content for d in results])
         except Exception as e:
             print(f"Search error: {e}")
     if inventory == "Unknown Spirit":
         response = "I'm having trouble reading that label. Check the 'Vision Debug' gallery below—is the crop clear?"
     elif recipe_context:
+        response = f"I see you have **{inventory}**. Here are some recipes from your collection:\n\n{recipe_context}"
     else:
         response = f"I see you have **{inventory}**! I don't have a specific recipe for that in the current library. Should I suggest a classic drink?"
     history.append({"role": "user", "content": message})
     history.append({"role": "assistant", "content": response})
             send_btn = gr.Button("Mix It Up", variant="primary")
     ingest_btn.click(ingest_recipes, file_up, status)
     msg.submit(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state, debug_gallery])
     send_btn.click(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state, debug_gallery])