Spaces:

skshimada
/

Hello

Sleeping

App Files Files Community

skshimada commited on 12 days ago

Commit

993f3d0

verified ·

1 Parent(s): 6b342db

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -18

app.py CHANGED Viewed

@@ -17,7 +17,6 @@ VISION_MODEL = "HuggingFaceTB/SmolVLM-Instruct"
 # --- SYSTEM INITIALIZATION ---
 print("⚙️ Loading Stable Vision Engine...")
-# We use float32 and CPU to ensure the app doesn't crash on the free tier
 vision_pipe = pipeline(
     "image-text-to-text",
     model=VISION_MODEL,
@@ -39,7 +38,11 @@ def get_bottle_crops(image_path):
             for box in r.boxes:
                 if int(box.cls) == 39: # Bottle index
                     x1, y1, x2, y2 = box.xyxy[0].tolist()
-                    found_crops.append(original_img.crop((x1-5, y1-5, x2+5, y2+5)))
         del yolo_model
         gc.collect()
         return found_crops
@@ -50,7 +53,6 @@ def get_bottle_crops(image_path):
 # --- RECIPE INGESTION ---
 def ingest_recipes(files):
     if not files: return "❌ No files uploaded."
     docs = []
     for f in files:
         try:
@@ -63,8 +65,7 @@ def ingest_recipes(files):
         except Exception as e:
             print(f"Error loading {f.name}: {e}")
-    if not docs:
-        return "❌ Could not extract text from files."
     vector_store = Chroma.from_documents(
         documents=docs,
@@ -83,20 +84,23 @@ def bartend(message, history, img_path, inventory):
         prompt_text = "User: <image>\nWhat is the brand and type of alcohol in this image? Answer briefly.\nAssistant:"
         try:
-            output = vision_pipe(target_img, prompt=prompt_text, generate_kwargs={"max_new_tokens": 30})
             raw_label = output[0]['generated_text']
             if "Assistant:" in raw_label:
                 inventory = raw_label.split("Assistant:")[-1].strip()
             else:
                 inventory = raw_label.replace(prompt_text, "").strip()
         except Exception as e:
             print(f"Vision error: {e}")
             inventory = "Unknown Spirit"
     # 2. RAG (Recipe Search)
     recipe_context = ""
-    if inventory and inventory != "Empty Shelf":
         try:
             if os.path.exists(CHROMA_PATH):
                 vs = Chroma(persist_directory=CHROMA_PATH, embedding_function=embed_model)
@@ -107,20 +111,21 @@ def bartend(message, history, img_path, inventory):
             print(f"Search error: {e}")
     # 3. Create the Response
-    if recipe_context:
-        response = f"I see you have **{inventory}**. Here is a recipe I found in your collection:\n\n{recipe_context}"
     else:
-        response = f"I see you have **{inventory}**! I don't have a specific recipe for that in the current library. Should I suggest a classic drink?"
-    # --- UPDATED FOR NEW GRADIO FORMAT ---
-    # We now use a dictionary format {role, content} instead of tuples
     history.append({"role": "user", "content": message})
     history.append({"role": "assistant", "content": response})
     return history, inventory
 # --- UI LAYOUT ---
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🍸 LocalAGI: The AI Sommelier")
     inv_state = gr.State("Empty Shelf")
@@ -133,17 +138,16 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
             img = gr.Image(type="filepath", label="2. Photo of your Bottle")
         with gr.Column(scale=2):
-            # FIXED: Added type="messages" to tell Gradio we are using the new dictionary format
-            chatbot = gr.Chatbot(height=500, label="Bartender Chat", type="messages")
             msg = gr.Textbox(label="3. Your Message", placeholder="Ask for a drink suggestion...")
             send_btn = gr.Button("Mix It Up", variant="primary")
     # Connect the buttons
     ingest_btn.click(ingest_recipes, file_up, status)
-    # Connect Chat Events
     msg.submit(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state])
     send_btn.click(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state])
 if __name__ == "__main__":
-    demo.launch()

 # --- SYSTEM INITIALIZATION ---
 print("⚙️ Loading Stable Vision Engine...")
 vision_pipe = pipeline(
     "image-text-to-text",
     model=VISION_MODEL,
             for box in r.boxes:
                 if int(box.cls) == 39: # Bottle index
                     x1, y1, x2, y2 = box.xyxy[0].tolist()
+                    w, h = original_img.size
+                    # Add margin for context
+                    x1, y1 = max(0, x1 - 20), max(0, y1 - 20)
+                    x2, y2 = min(w, x2 + 20), min(h, y2 + 20)
+                    found_crops.append(original_img.crop((x1, y1, x2, y2)))
         del yolo_model
         gc.collect()
         return found_crops
 # --- RECIPE INGESTION ---
 def ingest_recipes(files):
     if not files: return "❌ No files uploaded."
     docs = []
     for f in files:
         try:
         except Exception as e:
             print(f"Error loading {f.name}: {e}")
+    if not docs: return "❌ Could not extract text."
     vector_store = Chroma.from_documents(
         documents=docs,
         prompt_text = "User: <image>\nWhat is the brand and type of alcohol in this image? Answer briefly.\nAssistant:"
         try:
+            output = vision_pipe(target_img, prompt=prompt_text, generate_kwargs={"max_new_tokens": 50})
             raw_label = output[0]['generated_text']
             if "Assistant:" in raw_label:
                 inventory = raw_label.split("Assistant:")[-1].strip()
             else:
                 inventory = raw_label.replace(prompt_text, "").strip()
+            inventory = re.sub(r'<.*?>', '', inventory).strip().split('.')[0]
         except Exception as e:
             print(f"Vision error: {e}")
             inventory = "Unknown Spirit"
     # 2. RAG (Recipe Search)
     recipe_context = ""
+    if inventory and inventory not in ["Empty Shelf", "Unknown Spirit", ""]:
         try:
             if os.path.exists(CHROMA_PATH):
                 vs = Chroma(persist_directory=CHROMA_PATH, embedding_function=embed_model)
             print(f"Search error: {e}")
     # 3. Create the Response
+    if inventory == "Unknown Spirit":
+        response = "I'm having trouble reading that label. Try taking a closer photo of just the brand name."
+    elif recipe_context:
+        response = f"I see you have **{inventory}**. Here is a recipe from your collection:\n\n{recipe_context}"
     else:
+        response = f"I see you have **{inventory}**! I don't have a specific recipe for that in the library yet."
+    # Standard Dictionary Format for Gradio 6.0
     history.append({"role": "user", "content": message})
     history.append({"role": "assistant", "content": response})
     return history, inventory
 # --- UI LAYOUT ---
+with gr.Blocks() as demo:
     gr.Markdown("# 🍸 LocalAGI: The AI Sommelier")
     inv_state = gr.State("Empty Shelf")
             img = gr.Image(type="filepath", label="2. Photo of your Bottle")
         with gr.Column(scale=2):
+            # FIXED LINE BELOW: Removed type="messages"
+            chatbot = gr.Chatbot(height=500, label="Bartender Chat")
             msg = gr.Textbox(label="3. Your Message", placeholder="Ask for a drink suggestion...")
             send_btn = gr.Button("Mix It Up", variant="primary")
     # Connect the buttons
     ingest_btn.click(ingest_recipes, file_up, status)
     msg.submit(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state])
     send_btn.click(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state])
 if __name__ == "__main__":
+    # Theme moved here for Gradio 6.0 compatibility
+    demo.launch(theme=gr.themes.Soft())