skshimada commited on
Commit
1cc7f06
·
verified ·
1 Parent(s): 4445e3c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -55
app.py CHANGED
@@ -12,13 +12,13 @@ from langchain_huggingface import HuggingFaceEmbeddings
12
  from ultralytics import YOLO
13
 
14
  # --- CONFIGURATION ---
 
15
  CHROMA_PATH = "/tmp/chroma_db"
16
- # SmolVLM is a very efficient "Vision-Language-Model" for CPU usage
17
  VISION_MODEL = "HuggingFaceTB/SmolVLM-Instruct"
18
 
19
  # --- SYSTEM INITIALIZATION ---
20
  print("⚙️ Loading Stable Vision Engine...")
21
- # We use device="cpu" and float32 to avoid the "accelerate" dependency error
22
  vision_pipe = pipeline(
23
  "image-text-to-text",
24
  model=VISION_MODEL,
@@ -29,23 +29,24 @@ vision_pipe = pipeline(
29
  print("📚 Loading Embedding Engine...")
30
  embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
31
 
32
- # --- BOTTLE DETECTION ---
33
  def get_bottle_crops(image_path):
34
- # YOLO downloads its weights automatically to the local directory
35
- yolo_model = YOLO("yolov8n.pt")
36
- results = yolo_model(image_path, verbose=False)
37
- found_crops = []
38
- original_img = Image.open(image_path)
39
- for r in results:
40
- for box in r.boxes:
41
- if int(box.cls) == 39: # COCO Index 39 = Bottle
42
- x1, y1, x2, y2 = box.xyxy[0].tolist()
43
- found_crops.append(original_img.crop((x1-5, y1-5, x2+5, y2+5)))
44
-
45
- # Manual cleanup to save RAM on the Free Tier
46
- del yolo_model
47
- gc.collect()
48
- return found_crops
 
49
 
50
  # --- RECIPE INGESTION ---
51
  def ingest_recipes(files):
@@ -66,53 +67,55 @@ def ingest_recipes(files):
66
  if not docs:
67
  return "❌ Could not extract text from files."
68
 
69
- # Initializing the vector database in /tmp for write access
70
  vector_store = Chroma.from_documents(
71
  documents=docs,
72
  embedding=embed_model,
73
  persist_directory=CHROMA_PATH
74
  )
75
- return f"✅ Ingested {len(docs)} pages/recipes into the bar library."
76
 
77
  # --- BARTENDER LOGIC ---
78
  def bartend(message, history, img_path, inventory):
79
- # 1. Vision Scanning (if image is provided)
80
  if img_path:
81
  crops = get_bottle_crops(img_path)
82
- target = crops[0] if crops else Image.open(img_path)
83
 
84
- # Format for SmolVLM to ensure high accuracy
85
- messages = [
86
- {
87
- "role": "user",
88
- "content": [
89
- {"type": "image"},
90
- {"type": "text", "text": "Identify the brand and specific alcohol type in this image. Answer briefly."}
91
- ]
92
- }
93
- ]
94
 
95
- output = vision_pipe(target, prompt=messages, generate_kwargs={"max_new_tokens": 30})
96
- raw_label = output[0]['generated_text']
97
- # Extract the Assistant's answer from the prompt/response sequence
98
- inventory = raw_label.split("Assistant:")[-1].strip()
 
 
 
 
 
 
 
 
 
99
 
100
- # 2. RAG (Recipe Search in PDF/TXT)
101
- context = ""
102
- try:
103
- vs = Chroma(persist_directory=CHROMA_PATH, embedding_function=embed_model)
104
- search_query = f"Cocktail recipe using {inventory}"
105
- results = vs.similarity_search(search_query, k=2)
106
- context = "\n---\n".join([d.page_content for d in results])
107
- except Exception as e:
108
- print(f"Search error: {e}")
109
- context = ""
 
110
 
111
- # 3. Final Response Construction
112
- if context:
113
- response = f"I see you have **{inventory}**. Based on your recipe books, here is a suggestion:\n\n{context}"
114
  else:
115
- response = f"I identified **{inventory}** on your shelf! I don't see a specific match in your uploaded books, but I can suggest a classic drink for this spirit if you'd like."
116
 
117
  history.append((message, response))
118
  return history, inventory
@@ -124,19 +127,21 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
124
 
125
  with gr.Row():
126
  with gr.Column(scale=1):
127
- file_up = gr.File(label="1. Upload Recipe Books (PDF/TXT)", file_count="multiple")
128
- ingest_btn = gr.Button("📥 Load into Memory")
129
  status = gr.Textbox(label="System Status", value="Ready")
130
  gr.Markdown("---")
131
  img = gr.Image(type="filepath", label="2. Photo of your Bottle")
132
 
133
  with gr.Column(scale=2):
134
- chatbot = gr.Chatbot(height=500, label="Bartender")
135
- msg = gr.Textbox(label="3. Ask for a drink", placeholder="Tell me what you feel like drinking...")
136
  send_btn = gr.Button("Mix It Up", variant="primary")
137
 
138
- # Wire up events
139
  ingest_btn.click(ingest_recipes, file_up, status)
 
 
140
  msg.submit(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state])
141
  send_btn.click(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state])
142
 
 
12
  from ultralytics import YOLO
13
 
14
  # --- CONFIGURATION ---
15
+ # We use /tmp because it is the only folder Hugging Face lets us write to
16
  CHROMA_PATH = "/tmp/chroma_db"
 
17
  VISION_MODEL = "HuggingFaceTB/SmolVLM-Instruct"
18
 
19
  # --- SYSTEM INITIALIZATION ---
20
  print("⚙️ Loading Stable Vision Engine...")
21
+ # We use float32 and CPU to ensure the app doesn't crash on the free tier
22
  vision_pipe = pipeline(
23
  "image-text-to-text",
24
  model=VISION_MODEL,
 
29
  print("📚 Loading Embedding Engine...")
30
  embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
31
 
32
+ # --- BOTTLE DETECTION (YOLO) ---
33
  def get_bottle_crops(image_path):
34
+ try:
35
+ yolo_model = YOLO("yolov8n.pt")
36
+ results = yolo_model(image_path, verbose=False)
37
+ found_crops = []
38
+ original_img = Image.open(image_path)
39
+ for r in results:
40
+ for box in r.boxes:
41
+ if int(box.cls) == 39: # 39 is the 'bottle' category
42
+ x1, y1, x2, y2 = box.xyxy[0].tolist()
43
+ found_crops.append(original_img.crop((x1-5, y1-5, x2+5, y2+5)))
44
+ del yolo_model
45
+ gc.collect()
46
+ return found_crops
47
+ except Exception as e:
48
+ print(f"YOLO Error: {e}")
49
+ return []
50
 
51
  # --- RECIPE INGESTION ---
52
  def ingest_recipes(files):
 
67
  if not docs:
68
  return "❌ Could not extract text from files."
69
 
70
+ # This creates the searchable 'brain' from your PDFs
71
  vector_store = Chroma.from_documents(
72
  documents=docs,
73
  embedding=embed_model,
74
  persist_directory=CHROMA_PATH
75
  )
76
+ return f"✅ Bar library updated with {len(docs)} items."
77
 
78
  # --- BARTENDER LOGIC ---
79
  def bartend(message, history, img_path, inventory):
80
+ # 1. Vision Scanning
81
  if img_path:
82
  crops = get_bottle_crops(img_path)
83
+ target_img = crops[0] if crops else Image.open(img_path)
84
 
85
+ # We use a simple prompt string which works best for this pipeline version
86
+ prompt_text = "What is the brand and type of alcohol in this image? Answer briefly."
 
 
 
 
 
 
 
 
87
 
88
+ try:
89
+ # Fixing the pipeline call format
90
+ output = vision_pipe(target_img, prompt=prompt_text, generate_kwargs={"max_new_tokens": 30})
91
+ raw_label = output[0]['generated_text']
92
+
93
+ # Clean the output to get just the name
94
+ if "Answer:" in raw_label:
95
+ inventory = raw_label.split("Answer:")[-1].strip()
96
+ else:
97
+ inventory = raw_label.replace(prompt_text, "").strip()
98
+ except Exception as e:
99
+ print(f"Vision error: {e}")
100
+ inventory = "Unknown Spirit"
101
 
102
+ # 2. RAG (Search the PDF recipes)
103
+ recipe_context = ""
104
+ if inventory and inventory != "Empty Shelf":
105
+ try:
106
+ if os.path.exists(CHROMA_PATH):
107
+ vs = Chroma(persist_directory=CHROMA_PATH, embedding_function=embed_model)
108
+ search_query = f"Cocktail recipe using {inventory}"
109
+ results = vs.similarity_search(search_query, k=2)
110
+ recipe_context = "\n---\n".join([d.page_content for d in results])
111
+ except Exception as e:
112
+ print(f"Search error: {e}")
113
 
114
+ # 3. Create the Response
115
+ if recipe_context:
116
+ response = f"I see you have **{inventory}**. Here is a recipe I found in your collection:\n\n{recipe_context}"
117
  else:
118
+ response = f"I see you have **{inventory}**! I don't have a specific recipe for that in the current library. Should I suggest a classic drink?"
119
 
120
  history.append((message, response))
121
  return history, inventory
 
127
 
128
  with gr.Row():
129
  with gr.Column(scale=1):
130
+ file_up = gr.File(label="1. Upload Recipe PDFs/TXTs", file_count="multiple")
131
+ ingest_btn = gr.Button("📥 Load Recipes into Memory")
132
  status = gr.Textbox(label="System Status", value="Ready")
133
  gr.Markdown("---")
134
  img = gr.Image(type="filepath", label="2. Photo of your Bottle")
135
 
136
  with gr.Column(scale=2):
137
+ chatbot = gr.Chatbot(height=500, label="Bartender Chat")
138
+ msg = gr.Textbox(label="3. Your Message", placeholder="Ask for a drink suggestion...")
139
  send_btn = gr.Button("Mix It Up", variant="primary")
140
 
141
+ # Connect the buttons to the logic
142
  ingest_btn.click(ingest_recipes, file_up, status)
143
+
144
+ # Allows pressing 'Enter' in the textbox or clicking the button
145
  msg.submit(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state])
146
  send_btn.click(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state])
147