Spaces:

skshimada
/

Hello

Sleeping

App Files Files Community

skshimada commited on Feb 18

Commit

099e0d3

verified ·

1 Parent(s): d8f8152

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -17

app.py CHANGED Viewed

@@ -27,7 +27,7 @@ vision_pipe = pipeline(
 print("📚 Loading Embedding Engine...")
 embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
-# --- BOTTLE DETECTION ---
 def get_bottle_crops(image_path):
     try:
         yolo_model = YOLO("yolov8n.pt")
@@ -36,9 +36,13 @@ def get_bottle_crops(image_path):
         original_img = Image.open(image_path)
         for r in results:
             for box in r.boxes:
-                if int(box.cls) == 39: # Bottle
                     x1, y1, x2, y2 = box.xyxy[0].tolist()
-                    found_crops.append(original_img.crop((x1-5, y1-5, x2+5, y2+5)))
         del yolo_model
         gc.collect()
         return found_crops
@@ -77,20 +81,36 @@ def bartend(message, history, img_path, inventory):
     # 1. Vision Scanning
     if img_path:
         crops = get_bottle_crops(img_path)
         target_img = crops[0] if crops else Image.open(img_path)
-        prompt_text = "What is the brand and type of alcohol in this image? Answer briefly."
         try:
-            output = vision_pipe(target_img, prompt=prompt_text, generate_kwargs={"max_new_tokens": 30})
             raw_label = output[0]['generated_text']
-            inventory = raw_label.split("Answer:")[-1].strip() if "Answer:" in raw_label else raw_label.replace(prompt_text, "").strip()
         except Exception as e:
             print(f"Vision error: {e}")
             inventory = "Unknown Spirit"
-    # 2. RAG (Recipe Search)
     recipe_context = ""
-    if inventory and inventory != "Empty Shelf":
         try:
             if os.path.exists(CHROMA_PATH):
                 vs = Chroma(persist_directory=CHROMA_PATH, embedding_function=embed_model)
@@ -101,41 +121,40 @@ def bartend(message, history, img_path, inventory):
             print(f"Search error: {e}")
     # 3. Create the Response
-    if recipe_context:
-        response = f"I see you have **{inventory}**. Here is a recipe I found in your collection:\n\n{recipe_context}"
     else:
-        response = f"I see you have **{inventory}**! I don't have a specific recipe for that in the library yet."
-    # dictionary format for Gradio 6.0
     history.append({"role": "user", "content": message})
     history.append({"role": "assistant", "content": response})
     return history, inventory
 # --- UI LAYOUT ---
-# Removed theme from Blocks (it's now in launch)
 with gr.Blocks() as demo:
     gr.Markdown("# 🍸 LocalAGI: The AI Sommelier")
     inv_state = gr.State("Empty Shelf")
     with gr.Row():
         with gr.Column(scale=1):
-            file_up = gr.File(label="1. Upload Recipes (PDF/TXT)", file_count="multiple")
             ingest_btn = gr.Button("📥 Load into Memory")
             status = gr.Textbox(label="System Status", value="Ready")
             gr.Markdown("---")
             img = gr.Image(type="filepath", label="2. Photo of your Bottle")
         with gr.Column(scale=2):
-            # Removed type="messages" (dictionary format is now default in 6.0)
             chatbot = gr.Chatbot(height=500, label="Bartender Chat")
-            msg = gr.Textbox(label="3. Your Message", placeholder="Ask for a suggestion...")
             send_btn = gr.Button("Mix It Up", variant="primary")
     ingest_btn.click(ingest_recipes, file_up, status)
     msg.submit(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state])
     send_btn.click(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state])
 if __name__ == "__main__":
-    # Moved theme to launch() as required by Gradio 6.0
     demo.launch(theme=gr.themes.Soft())

 print("📚 Loading Embedding Engine...")
 embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+# --- BOTTLE DETECTION (YOLO) ---
 def get_bottle_crops(image_path):
     try:
         yolo_model = YOLO("yolov8n.pt")
         original_img = Image.open(image_path)
         for r in results:
             for box in r.boxes:
+                if int(box.cls) == 39: # COCO index for bottle
                     x1, y1, x2, y2 = box.xyxy[0].tolist()
+                    # Add a 10% margin to the crop to help the vision model see context
+                    w, h = original_img.size
+                    x1, y1 = max(0, x1 - 20), max(0, y1 - 20)
+                    x2, y2 = min(w, x2 + 20), min(h, y2 + 20)
+                    found_crops.append(original_img.crop((x1, y1, x2, y2)))
         del yolo_model
         gc.collect()
         return found_crops
     # 1. Vision Scanning
     if img_path:
         crops = get_bottle_crops(img_path)
+        # Use the first crop if available, otherwise the full image
         target_img = crops[0] if crops else Image.open(img_path)
+        # SmolVLM prefers this structured prompt format to separate image from instructions
+        # We use 'Assistant:' as a trigger for the model to begin its response
+        prompt_text = "User: <image>\nIdentify the brand and type of alcohol. Be concise.\nAssistant:"
         try:
+            output = vision_pipe(target_img, prompt=prompt_text, generate_kwargs={"max_new_tokens": 50})
             raw_label = output[0]['generated_text']
+            # Extract only the AI's new answer
+            if "Assistant:" in raw_label:
+                inventory = raw_label.split("Assistant:")[-1].strip()
+            else:
+                inventory = raw_label.replace(prompt_text, "").strip()
+            # Clean up potential leftover markdown or tags
+            inventory = re.sub(r'<.*?>', '', inventory).strip()
+            # If the model gives a full sentence, try to shorten it
+            inventory = inventory.split('.')[0]
         except Exception as e:
             print(f"Vision error: {e}")
             inventory = "Unknown Spirit"
+    # 2. RAG (Search the recipes)
     recipe_context = ""
+    # Safeguard: Don't search if we don't have a valid spirit name
+    if inventory and inventory not in ["Empty Shelf", "Unknown Spirit", ""]:
         try:
             if os.path.exists(CHROMA_PATH):
                 vs = Chroma(persist_directory=CHROMA_PATH, embedding_function=embed_model)
             print(f"Search error: {e}")
     # 3. Create the Response
+    if inventory == "Unknown Spirit":
+        response = "I'm having trouble reading that label. Could you tell me what the bottle is, or try taking a clearer photo of just the label?"
+    elif recipe_context:
+        response = f"I see you have **{inventory}**. Here is a suggestion from your library:\n\n{recipe_context}"
     else:
+        response = f"I see you have **{inventory}**! I couldn't find a specific match in your uploaded books. Would you like a classic recommendation instead?"
     history.append({"role": "user", "content": message})
     history.append({"role": "assistant", "content": response})
     return history, inventory
 # --- UI LAYOUT ---
 with gr.Blocks() as demo:
     gr.Markdown("# 🍸 LocalAGI: The AI Sommelier")
     inv_state = gr.State("Empty Shelf")
     with gr.Row():
         with gr.Column(scale=1):
+            file_up = gr.File(label="1. Upload Recipe PDFs/TXTs", file_count="multiple")
             ingest_btn = gr.Button("📥 Load into Memory")
             status = gr.Textbox(label="System Status", value="Ready")
             gr.Markdown("---")
             img = gr.Image(type="filepath", label="2. Photo of your Bottle")
         with gr.Column(scale=2):
             chatbot = gr.Chatbot(height=500, label="Bartender Chat")
+            msg = gr.Textbox(label="3. Your Message", placeholder="Suggest a drink for me...")
             send_btn = gr.Button("Mix It Up", variant="primary")
+    # Connect UI events
     ingest_btn.click(ingest_recipes, file_up, status)
     msg.submit(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state])
     send_btn.click(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state])
 if __name__ == "__main__":
     demo.launch(theme=gr.themes.Soft())