Spaces:

skshimada
/

Hello

Sleeping

App Files Files Community

skshimada commited on 14 days ago

Commit

1cc7f06

verified ·

1 Parent(s): 4445e3c

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -55

app.py CHANGED Viewed

@@ -12,13 +12,13 @@ from langchain_huggingface import HuggingFaceEmbeddings
 from ultralytics import YOLO
 # --- CONFIGURATION ---
 CHROMA_PATH = "/tmp/chroma_db"
-# SmolVLM is a very efficient "Vision-Language-Model" for CPU usage
 VISION_MODEL = "HuggingFaceTB/SmolVLM-Instruct"
 # --- SYSTEM INITIALIZATION ---
 print("⚙️ Loading Stable Vision Engine...")
-# We use device="cpu" and float32 to avoid the "accelerate" dependency error
 vision_pipe = pipeline(
     "image-text-to-text",
     model=VISION_MODEL,
@@ -29,23 +29,24 @@ vision_pipe = pipeline(
 print("📚 Loading Embedding Engine...")
 embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
-# --- BOTTLE DETECTION ---
 def get_bottle_crops(image_path):
-    # YOLO downloads its weights automatically to the local directory
-    yolo_model = YOLO("yolov8n.pt")
-    results = yolo_model(image_path, verbose=False)
-    found_crops = []
-    original_img = Image.open(image_path)
-    for r in results:
-        for box in r.boxes:
-            if int(box.cls) == 39: # COCO Index 39 = Bottle
-                x1, y1, x2, y2 = box.xyxy[0].tolist()
-                found_crops.append(original_img.crop((x1-5, y1-5, x2+5, y2+5)))
-    # Manual cleanup to save RAM on the Free Tier
-    del yolo_model
-    gc.collect()
-    return found_crops
 # --- RECIPE INGESTION ---
 def ingest_recipes(files):
@@ -66,53 +67,55 @@ def ingest_recipes(files):
     if not docs:
         return "❌ Could not extract text from files."
-    # Initializing the vector database in /tmp for write access
     vector_store = Chroma.from_documents(
         documents=docs,
         embedding=embed_model,
         persist_directory=CHROMA_PATH
     )
-    return f"✅ Ingested {len(docs)} pages/recipes into the bar library."
 # --- BARTENDER LOGIC ---
 def bartend(message, history, img_path, inventory):
-    # 1. Vision Scanning (if image is provided)
     if img_path:
         crops = get_bottle_crops(img_path)
-        target = crops[0] if crops else Image.open(img_path)
-        # Format for SmolVLM to ensure high accuracy
-        messages = [
-            {
-                "role": "user",
-                "content": [
-                    {"type": "image"},
-                    {"type": "text", "text": "Identify the brand and specific alcohol type in this image. Answer briefly."}
-                ]
-            }
-        ]
-        output = vision_pipe(target, prompt=messages, generate_kwargs={"max_new_tokens": 30})
-        raw_label = output[0]['generated_text']
-        # Extract the Assistant's answer from the prompt/response sequence
-        inventory = raw_label.split("Assistant:")[-1].strip()
-    # 2. RAG (Recipe Search in PDF/TXT)
-    context = ""
-    try:
-        vs = Chroma(persist_directory=CHROMA_PATH, embedding_function=embed_model)
-        search_query = f"Cocktail recipe using {inventory}"
-        results = vs.similarity_search(search_query, k=2)
-        context = "\n---\n".join([d.page_content for d in results])
-    except Exception as e:
-        print(f"Search error: {e}")
-        context = ""
-    # 3. Final Response Construction
-    if context:
-        response = f"I see you have **{inventory}**. Based on your recipe books, here is a suggestion:\n\n{context}"
     else:
-        response = f"I identified **{inventory}** on your shelf! I don't see a specific match in your uploaded books, but I can suggest a classic drink for this spirit if you'd like."
     history.append((message, response))
     return history, inventory
@@ -124,19 +127,21 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     with gr.Row():
         with gr.Column(scale=1):
-            file_up = gr.File(label="1. Upload Recipe Books (PDF/TXT)", file_count="multiple")
-            ingest_btn = gr.Button("📥 Load into Memory")
             status = gr.Textbox(label="System Status", value="Ready")
             gr.Markdown("---")
             img = gr.Image(type="filepath", label="2. Photo of your Bottle")
         with gr.Column(scale=2):
-            chatbot = gr.Chatbot(height=500, label="Bartender")
-            msg = gr.Textbox(label="3. Ask for a drink", placeholder="Tell me what you feel like drinking...")
             send_btn = gr.Button("Mix It Up", variant="primary")
-    # Wire up events
     ingest_btn.click(ingest_recipes, file_up, status)
     msg.submit(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state])
     send_btn.click(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state])

 from ultralytics import YOLO
 # --- CONFIGURATION ---
+# We use /tmp because it is the only folder Hugging Face lets us write to
 CHROMA_PATH = "/tmp/chroma_db"
 VISION_MODEL = "HuggingFaceTB/SmolVLM-Instruct"
 # --- SYSTEM INITIALIZATION ---
 print("⚙️ Loading Stable Vision Engine...")
+# We use float32 and CPU to ensure the app doesn't crash on the free tier
 vision_pipe = pipeline(
     "image-text-to-text",
     model=VISION_MODEL,
 print("📚 Loading Embedding Engine...")
 embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+# --- BOTTLE DETECTION (YOLO) ---
 def get_bottle_crops(image_path):
+    try:
+        yolo_model = YOLO("yolov8n.pt")
+        results = yolo_model(image_path, verbose=False)
+        found_crops = []
+        original_img = Image.open(image_path)
+        for r in results:
+            for box in r.boxes:
+                if int(box.cls) == 39: # 39 is the 'bottle' category
+                    x1, y1, x2, y2 = box.xyxy[0].tolist()
+                    found_crops.append(original_img.crop((x1-5, y1-5, x2+5, y2+5)))
+        del yolo_model
+        gc.collect()
+        return found_crops
+    except Exception as e:
+        print(f"YOLO Error: {e}")
+        return []
 # --- RECIPE INGESTION ---
 def ingest_recipes(files):
     if not docs:
         return "❌ Could not extract text from files."
+    # This creates the searchable 'brain' from your PDFs
     vector_store = Chroma.from_documents(
         documents=docs,
         embedding=embed_model,
         persist_directory=CHROMA_PATH
     )
+    return f"✅ Bar library updated with {len(docs)} items."
 # --- BARTENDER LOGIC ---
 def bartend(message, history, img_path, inventory):
+    # 1. Vision Scanning
     if img_path:
         crops = get_bottle_crops(img_path)
+        target_img = crops[0] if crops else Image.open(img_path)
+        # We use a simple prompt string which works best for this pipeline version
+        prompt_text = "What is the brand and type of alcohol in this image? Answer briefly."
+        try:
+            # Fixing the pipeline call format
+            output = vision_pipe(target_img, prompt=prompt_text, generate_kwargs={"max_new_tokens": 30})
+            raw_label = output[0]['generated_text']
+            # Clean the output to get just the name
+            if "Answer:" in raw_label:
+                inventory = raw_label.split("Answer:")[-1].strip()
+            else:
+                inventory = raw_label.replace(prompt_text, "").strip()
+        except Exception as e:
+            print(f"Vision error: {e}")
+            inventory = "Unknown Spirit"
+    # 2. RAG (Search the PDF recipes)
+    recipe_context = ""
+    if inventory and inventory != "Empty Shelf":
+        try:
+            if os.path.exists(CHROMA_PATH):
+                vs = Chroma(persist_directory=CHROMA_PATH, embedding_function=embed_model)
+                search_query = f"Cocktail recipe using {inventory}"
+                results = vs.similarity_search(search_query, k=2)
+                recipe_context = "\n---\n".join([d.page_content for d in results])
+        except Exception as e:
+            print(f"Search error: {e}")
+    # 3. Create the Response
+    if recipe_context:
+        response = f"I see you have **{inventory}**. Here is a recipe I found in your collection:\n\n{recipe_context}"
     else:
+        response = f"I see you have **{inventory}**! I don't have a specific recipe for that in the current library. Should I suggest a classic drink?"
     history.append((message, response))
     return history, inventory
     with gr.Row():
         with gr.Column(scale=1):
+            file_up = gr.File(label="1. Upload Recipe PDFs/TXTs", file_count="multiple")
+            ingest_btn = gr.Button("📥 Load Recipes into Memory")
             status = gr.Textbox(label="System Status", value="Ready")
             gr.Markdown("---")
             img = gr.Image(type="filepath", label="2. Photo of your Bottle")
         with gr.Column(scale=2):
+            chatbot = gr.Chatbot(height=500, label="Bartender Chat")
+            msg = gr.Textbox(label="3. Your Message", placeholder="Ask for a drink suggestion...")
             send_btn = gr.Button("Mix It Up", variant="primary")
+    # Connect the buttons to the logic
     ingest_btn.click(ingest_recipes, file_up, status)
+    # Allows pressing 'Enter' in the textbox or clicking the button
     msg.submit(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state])
     send_btn.click(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state])