Spaces:

skshimada
/

Hello

Sleeping

App Files Files Community

skshimada commited on 11 days ago

Commit

c3f6e08

verified ·

1 Parent(s): 1cc7f06

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -12

app.py CHANGED Viewed

@@ -12,13 +12,11 @@ from langchain_huggingface import HuggingFaceEmbeddings
 from ultralytics import YOLO
 # --- CONFIGURATION ---
-# We use /tmp because it is the only folder Hugging Face lets us write to
 CHROMA_PATH = "/tmp/chroma_db"
 VISION_MODEL = "HuggingFaceTB/SmolVLM-Instruct"
 # --- SYSTEM INITIALIZATION ---
 print("⚙️ Loading Stable Vision Engine...")
-# We use float32 and CPU to ensure the app doesn't crash on the free tier
 vision_pipe = pipeline(
     "image-text-to-text",
     model=VISION_MODEL,
@@ -38,7 +36,7 @@ def get_bottle_crops(image_path):
         original_img = Image.open(image_path)
         for r in results:
             for box in r.boxes:
-                if int(box.cls) == 39: # 39 is the 'bottle' category
                     x1, y1, x2, y2 = box.xyxy[0].tolist()
                     found_crops.append(original_img.crop((x1-5, y1-5, x2+5, y2+5)))
         del yolo_model
@@ -67,7 +65,6 @@ def ingest_recipes(files):
     if not docs:
         return "❌ Could not extract text from files."
-    # This creates the searchable 'brain' from your PDFs
     vector_store = Chroma.from_documents(
         documents=docs,
         embedding=embed_model,
@@ -82,15 +79,12 @@ def bartend(message, history, img_path, inventory):
         crops = get_bottle_crops(img_path)
         target_img = crops[0] if crops else Image.open(img_path)
-        # We use a simple prompt string which works best for this pipeline version
         prompt_text = "What is the brand and type of alcohol in this image? Answer briefly."
         try:
-            # Fixing the pipeline call format
             output = vision_pipe(target_img, prompt=prompt_text, generate_kwargs={"max_new_tokens": 30})
             raw_label = output[0]['generated_text']
-            # Clean the output to get just the name
             if "Answer:" in raw_label:
                 inventory = raw_label.split("Answer:")[-1].strip()
             else:
@@ -99,7 +93,7 @@ def bartend(message, history, img_path, inventory):
             print(f"Vision error: {e}")
             inventory = "Unknown Spirit"
-    # 2. RAG (Search the PDF recipes)
     recipe_context = ""
     if inventory and inventory != "Empty Shelf":
         try:
@@ -117,7 +111,10 @@ def bartend(message, history, img_path, inventory):
     else:
         response = f"I see you have **{inventory}**! I don't have a specific recipe for that in the current library. Should I suggest a classic drink?"
-    history.append((message, response))
     return history, inventory
 # --- UI LAYOUT ---
@@ -134,14 +131,15 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
             img = gr.Image(type="filepath", label="2. Photo of your Bottle")
         with gr.Column(scale=2):
-            chatbot = gr.Chatbot(height=500, label="Bartender Chat")
             msg = gr.Textbox(label="3. Your Message", placeholder="Ask for a drink suggestion...")
             send_btn = gr.Button("Mix It Up", variant="primary")
-    # Connect the buttons to the logic
     ingest_btn.click(ingest_recipes, file_up, status)
-    # Allows pressing 'Enter' in the textbox or clicking the button
     msg.submit(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state])
     send_btn.click(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state])

 from ultralytics import YOLO
 # --- CONFIGURATION ---
 CHROMA_PATH = "/tmp/chroma_db"
 VISION_MODEL = "HuggingFaceTB/SmolVLM-Instruct"
 # --- SYSTEM INITIALIZATION ---
 print("⚙️ Loading Stable Vision Engine...")
 vision_pipe = pipeline(
     "image-text-to-text",
     model=VISION_MODEL,
         original_img = Image.open(image_path)
         for r in results:
             for box in r.boxes:
+                if int(box.cls) == 39: # Bottle index
                     x1, y1, x2, y2 = box.xyxy[0].tolist()
                     found_crops.append(original_img.crop((x1-5, y1-5, x2+5, y2+5)))
         del yolo_model
     if not docs:
         return "❌ Could not extract text from files."
     vector_store = Chroma.from_documents(
         documents=docs,
         embedding=embed_model,
         crops = get_bottle_crops(img_path)
         target_img = crops[0] if crops else Image.open(img_path)
         prompt_text = "What is the brand and type of alcohol in this image? Answer briefly."
         try:
             output = vision_pipe(target_img, prompt=prompt_text, generate_kwargs={"max_new_tokens": 30})
             raw_label = output[0]['generated_text']
             if "Answer:" in raw_label:
                 inventory = raw_label.split("Answer:")[-1].strip()
             else:
             print(f"Vision error: {e}")
             inventory = "Unknown Spirit"
+    # 2. RAG (Recipe Search)
     recipe_context = ""
     if inventory and inventory != "Empty Shelf":
         try:
     else:
         response = f"I see you have **{inventory}**! I don't have a specific recipe for that in the current library. Should I suggest a classic drink?"
+    # --- UPDATED FOR GRADIO "MESSAGES" FORMAT ---
+    history.append({"role": "user", "content": message})
+    history.append({"role": "assistant", "content": response})
     return history, inventory
 # --- UI LAYOUT ---
             img = gr.Image(type="filepath", label="2. Photo of your Bottle")
         with gr.Column(scale=2):
+            # FIXED: Added type="messages" to match the new dictionary history format
+            chatbot = gr.Chatbot(height=500, label="Bartender Chat", type="messages")
             msg = gr.Textbox(label="3. Your Message", placeholder="Ask for a drink suggestion...")
             send_btn = gr.Button("Mix It Up", variant="primary")
+    # Connect the buttons
     ingest_btn.click(ingest_recipes, file_up, status)
+    # Connect Chat Events
     msg.submit(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state])
     send_btn.click(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state])