skshimada commited on
Commit
33c5c81
·
verified ·
1 Parent(s): ce0a4da

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -32
app.py CHANGED
@@ -7,6 +7,7 @@ from PIL import Image
7
  from transformers import pipeline
8
  from langchain_chroma import Chroma
9
  from langchain_community.document_loaders import PyPDFLoader, TextLoader
 
10
  from langchain_core.documents import Document
11
  from langchain_huggingface import HuggingFaceEmbeddings
12
  from ultralytics import YOLO
@@ -27,7 +28,7 @@ vision_pipe = pipeline(
27
  print("📚 Loading Embedding Engine...")
28
  embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
29
 
30
- # --- BOTTLE DETECTION (SMART PADDING) ---
31
  def get_bottle_crops(image_path):
32
  print(f"🔍 DEBUG: Starting YOLO on {image_path}")
33
  found_crops = []
@@ -37,13 +38,11 @@ def get_bottle_crops(image_path):
37
  img_w, img_h = original_img.size
38
 
39
  yolo_model = YOLO("yolov8n.pt")
40
- # Extremely low confidence to catch anything
41
  results = yolo_model(image_path, verbose=True, conf=0.1)
42
 
43
  for r in results:
44
  for box in r.boxes:
45
- # Class 39 is bottle. We also check Class 40 (Wine glass) or 41 (Cup) just in case
46
- if int(box.cls) in [39, 40, 41]:
47
  x1, y1, x2, y2 = box.xyxy[0].tolist()
48
 
49
  # Dynamic 25% Padding
@@ -76,9 +75,10 @@ def get_bottle_crops(image_path):
76
  except:
77
  return []
78
 
79
- # --- RECIPE INGESTION ---
80
  def ingest_recipes(files):
81
  if not files: return "❌ No files uploaded."
 
82
  docs = []
83
  for f in files:
84
  try:
@@ -93,12 +93,21 @@ def ingest_recipes(files):
93
 
94
  if not docs: return "❌ Could not extract text."
95
 
 
 
 
 
 
 
 
 
 
96
  vector_store = Chroma.from_documents(
97
- documents=docs,
98
  embedding=embed_model,
99
  persist_directory=CHROMA_PATH
100
  )
101
- return f"✅ Bar library updated with {len(docs)} items."
102
 
103
  # --- BARTENDER LOGIC ---
104
  def bartend(message, history, img_path, inventory):
@@ -108,48 +117,34 @@ def bartend(message, history, img_path, inventory):
108
  if img_path:
109
  crops = get_bottle_crops(img_path)
110
  debug_images = crops
111
-
112
- # Start with the best crop
113
  target_img = crops[0] if crops else Image.open(img_path).convert("RGB")
114
 
115
- # Helper function with FIXED calling signature
116
  def identify_spirit(image_input):
117
- # Ensure image is RGB to prevent pipeline errors
118
- if image_input.mode != "RGB":
119
- image_input = image_input.convert("RGB")
120
-
121
- prompt = "User: <image>\nRead the label on the bottle. What is the specific brand and type of alcohol? Be precise.\nAssistant:"
122
-
123
- # FIXED: Passing prompt as a positional argument (the second argument)
124
- # This fixes the "ValueError: You must provide text" error
125
  out = vision_pipe(image_input, prompt, generate_kwargs={"max_new_tokens": 50})
126
-
127
  text = out[0]['generated_text']
128
- if "Assistant:" in text:
129
- return text.split("Assistant:")[-1].strip()
130
  return text.replace("User: <image>", "").strip()
131
 
132
- # Run Pass 1
133
  try:
134
  inventory = identify_spirit(target_img)
135
  inventory = re.sub(r'<.*?>', '', inventory).strip().split('.')[0]
136
  print(f"🔍 Pass 1 Result: {inventory}")
137
 
138
- # Generic Fallback Logic
139
- generic_terms = ["vodka", "gin", "rum", "tequila", "whiskey", "whisky", "bourbon", "brandy", "alcohol", "liquor", "spirit", "bottle", "drink", "glass"]
140
-
141
- # If the answer is too short or generic, try the FULL image
142
  if inventory.lower() in generic_terms or len(inventory) < 4:
143
  print("⚠��� Result too generic. Trying FULL IMAGE...")
144
  full_img_result = identify_spirit(Image.open(img_path).convert("RGB"))
145
  full_img_result = re.sub(r'<.*?>', '', full_img_result).strip().split('.')[0]
146
-
147
  if len(full_img_result) > len(inventory):
148
  inventory = full_img_result
149
  print(f"✅ Pass 2 Result: {inventory}")
150
 
151
  except Exception as e:
152
- print(f"❌ Vision Pipeline Failed: {e}")
153
  inventory = "Unknown Spirit"
154
 
155
  # 2. RAG (Recipe Search)
@@ -159,7 +154,9 @@ def bartend(message, history, img_path, inventory):
159
  if os.path.exists(CHROMA_PATH):
160
  vs = Chroma(persist_directory=CHROMA_PATH, embedding_function=embed_model)
161
  search_query = f"Cocktail recipe using {inventory}"
162
- results = vs.similarity_search(search_query, k=2)
 
 
163
  recipe_context = "\n---\n".join([d.page_content for d in results])
164
  except Exception as e:
165
  print(f"Search error: {e}")
@@ -168,11 +165,10 @@ def bartend(message, history, img_path, inventory):
168
  if inventory == "Unknown Spirit":
169
  response = "I'm having trouble reading that label. Check the 'Vision Debug' gallery below—is the crop clear?"
170
  elif recipe_context:
171
- response = f"I see you have **{inventory}**. Here is a recipe I found in your collection:\n\n{recipe_context}"
172
  else:
173
  response = f"I see you have **{inventory}**! I don't have a specific recipe for that in the current library. Should I suggest a classic drink?"
174
 
175
- # Gradio 6.0 Dictionary Format
176
  history.append({"role": "user", "content": message})
177
  history.append({"role": "assistant", "content": response})
178
 
@@ -201,7 +197,6 @@ with gr.Blocks() as demo:
201
  send_btn = gr.Button("Mix It Up", variant="primary")
202
 
203
  ingest_btn.click(ingest_recipes, file_up, status)
204
-
205
  msg.submit(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state, debug_gallery])
206
  send_btn.click(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state, debug_gallery])
207
 
 
7
  from transformers import pipeline
8
  from langchain_chroma import Chroma
9
  from langchain_community.document_loaders import PyPDFLoader, TextLoader
10
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
11
  from langchain_core.documents import Document
12
  from langchain_huggingface import HuggingFaceEmbeddings
13
  from ultralytics import YOLO
 
28
  print("📚 Loading Embedding Engine...")
29
  embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
30
 
31
+ # --- BOTTLE DETECTION ---
32
  def get_bottle_crops(image_path):
33
  print(f"🔍 DEBUG: Starting YOLO on {image_path}")
34
  found_crops = []
 
38
  img_w, img_h = original_img.size
39
 
40
  yolo_model = YOLO("yolov8n.pt")
 
41
  results = yolo_model(image_path, verbose=True, conf=0.1)
42
 
43
  for r in results:
44
  for box in r.boxes:
45
+ if int(box.cls) in [39, 40, 41]: # Bottle, Wine Glass, Cup
 
46
  x1, y1, x2, y2 = box.xyxy[0].tolist()
47
 
48
  # Dynamic 25% Padding
 
75
  except:
76
  return []
77
 
78
+ # --- RECIPE INGESTION (NOW WITH SCISSORS!) ---
79
  def ingest_recipes(files):
80
  if not files: return "❌ No files uploaded."
81
+
82
  docs = []
83
  for f in files:
84
  try:
 
93
 
94
  if not docs: return "❌ Could not extract text."
95
 
96
+ # --- THE FIX: SPLIT TEXT INTO RECIPES ---
97
+ # We split by "Recipe:" or newlines to ensure each drink is its own 'chunk'
98
+ text_splitter = RecursiveCharacterTextSplitter(
99
+ chunk_size=600, # Approximate size of one recipe
100
+ chunk_overlap=50, # Slight overlap to don't cut words
101
+ separators=["\nRecipe:", "Recipe:", "\n\n", "\n"] # Priority splitters
102
+ )
103
+ splits = text_splitter.split_documents(docs)
104
+
105
  vector_store = Chroma.from_documents(
106
+ documents=splits, # We ingest the SPLITS, not the whole doc
107
  embedding=embed_model,
108
  persist_directory=CHROMA_PATH
109
  )
110
+ return f"✅ Bar library updated. Split into {len(splits)} individual recipes."
111
 
112
  # --- BARTENDER LOGIC ---
113
  def bartend(message, history, img_path, inventory):
 
117
  if img_path:
118
  crops = get_bottle_crops(img_path)
119
  debug_images = crops
 
 
120
  target_img = crops[0] if crops else Image.open(img_path).convert("RGB")
121
 
 
122
  def identify_spirit(image_input):
123
+ if image_input.mode != "RGB": image_input = image_input.convert("RGB")
124
+ prompt = "User: <image>\nRead the label. What is the specific brand and type of alcohol? Be precise.\nAssistant:"
125
+ # Positional argument fix
 
 
 
 
 
126
  out = vision_pipe(image_input, prompt, generate_kwargs={"max_new_tokens": 50})
 
127
  text = out[0]['generated_text']
128
+ if "Assistant:" in text: return text.split("Assistant:")[-1].strip()
 
129
  return text.replace("User: <image>", "").strip()
130
 
 
131
  try:
132
  inventory = identify_spirit(target_img)
133
  inventory = re.sub(r'<.*?>', '', inventory).strip().split('.')[0]
134
  print(f"🔍 Pass 1 Result: {inventory}")
135
 
136
+ # Generic Fallback
137
+ generic_terms = ["vodka", "gin", "rum", "tequila", "whiskey", "whisky", "bourbon", "brandy", "alcohol", "liquor", "spirit", "bottle", "drink"]
 
 
138
  if inventory.lower() in generic_terms or len(inventory) < 4:
139
  print("⚠��� Result too generic. Trying FULL IMAGE...")
140
  full_img_result = identify_spirit(Image.open(img_path).convert("RGB"))
141
  full_img_result = re.sub(r'<.*?>', '', full_img_result).strip().split('.')[0]
 
142
  if len(full_img_result) > len(inventory):
143
  inventory = full_img_result
144
  print(f"✅ Pass 2 Result: {inventory}")
145
 
146
  except Exception as e:
147
+ print(f"❌ Vision Failed: {e}")
148
  inventory = "Unknown Spirit"
149
 
150
  # 2. RAG (Recipe Search)
 
154
  if os.path.exists(CHROMA_PATH):
155
  vs = Chroma(persist_directory=CHROMA_PATH, embedding_function=embed_model)
156
  search_query = f"Cocktail recipe using {inventory}"
157
+
158
+ # INCREASED K to 5 to give you more options
159
+ results = vs.similarity_search(search_query, k=5)
160
  recipe_context = "\n---\n".join([d.page_content for d in results])
161
  except Exception as e:
162
  print(f"Search error: {e}")
 
165
  if inventory == "Unknown Spirit":
166
  response = "I'm having trouble reading that label. Check the 'Vision Debug' gallery below—is the crop clear?"
167
  elif recipe_context:
168
+ response = f"I see you have **{inventory}**. Here are some recipes from your collection:\n\n{recipe_context}"
169
  else:
170
  response = f"I see you have **{inventory}**! I don't have a specific recipe for that in the current library. Should I suggest a classic drink?"
171
 
 
172
  history.append({"role": "user", "content": message})
173
  history.append({"role": "assistant", "content": response})
174
 
 
197
  send_btn = gr.Button("Mix It Up", variant="primary")
198
 
199
  ingest_btn.click(ingest_recipes, file_up, status)
 
200
  msg.submit(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state, debug_gallery])
201
  send_btn.click(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state, debug_gallery])
202