skshimada commited on
Commit
ce0a4da
·
verified ·
1 Parent(s): 4ba62ef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -32
app.py CHANGED
@@ -33,28 +33,25 @@ def get_bottle_crops(image_path):
33
  found_crops = []
34
 
35
  try:
36
- original_img = Image.open(image_path)
37
  img_w, img_h = original_img.size
38
 
39
  yolo_model = YOLO("yolov8n.pt")
40
- # Low confidence to catch everything
41
  results = yolo_model(image_path, verbose=True, conf=0.1)
42
 
43
  for r in results:
44
  for box in r.boxes:
45
- if int(box.cls) == 39: # Bottle
 
46
  x1, y1, x2, y2 = box.xyxy[0].tolist()
47
 
48
- # --- NEW: Dynamic 25% Padding ---
49
- # Calculate width and height of the detected box
50
  box_w = x2 - x1
51
  box_h = y2 - y1
52
-
53
- # Expand by 25% of the box's own size
54
  pad_x = int(box_w * 0.25)
55
  pad_y = int(box_h * 0.25)
56
 
57
- # Apply padding but stay within image bounds
58
  x1 = max(0, x1 - pad_x)
59
  y1 = max(0, y1 - pad_y)
60
  x2 = min(img_w, x2 + pad_x)
@@ -75,7 +72,7 @@ def get_bottle_crops(image_path):
75
  except Exception as e:
76
  print(f"❌ YOLO CRASH: {e}")
77
  try:
78
- return [Image.open(image_path)]
79
  except:
80
  return []
81
 
@@ -113,37 +110,47 @@ def bartend(message, history, img_path, inventory):
113
  debug_images = crops
114
 
115
  # Start with the best crop
116
- target_img = crops[0] if crops else Image.open(img_path)
117
 
118
- # Helper function to run vision model
119
  def identify_spirit(image_input):
120
- prompt = "User: <image>\nRead the label. What is the specific brand and type of alcohol? Be precise.\nAssistant:"
121
- out = vision_pipe(image_input, prompt=prompt, generate_kwargs={"max_new_tokens": 50})
 
 
 
 
 
 
 
 
122
  text = out[0]['generated_text']
123
  if "Assistant:" in text:
124
  return text.split("Assistant:")[-1].strip()
125
  return text.replace("User: <image>", "").strip()
126
 
127
- # Run First Pass (Crop)
128
- inventory = identify_spirit(target_img)
129
- inventory = re.sub(r'<.*?>', '', inventory).strip().split('.')[0]
130
-
131
- print(f"🔍 Pass 1 Result: {inventory}")
132
-
133
- # --- NEW: The "Generic Fallback" Logic ---
134
- # If the result is just a generic category, we missed the brand.
135
- # Force a check on the FULL image.
136
- generic_terms = ["vodka", "gin", "rum", "tequila", "whiskey", "whisky", "bourbon", "brandy", "alcohol", "liquor", "spirit", "bottle"]
137
-
138
- if inventory.lower() in generic_terms or len(inventory) < 4:
139
- print("⚠️ Result too generic. Trying FULL IMAGE...")
140
- full_img_result = identify_spirit(Image.open(img_path))
141
- full_img_result = re.sub(r'<.*?>', '', full_img_result).strip().split('.')[0]
142
 
143
- # If the full image gave us a longer (more specific) name, use it
144
- if len(full_img_result) > len(inventory):
145
- inventory = full_img_result
146
- print(f"✅ Pass 2 Result: {inventory}")
 
 
 
 
 
 
 
 
 
147
 
148
  # 2. RAG (Recipe Search)
149
  recipe_context = ""
@@ -165,6 +172,7 @@ def bartend(message, history, img_path, inventory):
165
  else:
166
  response = f"I see you have **{inventory}**! I don't have a specific recipe for that in the current library. Should I suggest a classic drink?"
167
 
 
168
  history.append({"role": "user", "content": message})
169
  history.append({"role": "assistant", "content": response})
170
 
 
33
  found_crops = []
34
 
35
  try:
36
+ original_img = Image.open(image_path).convert("RGB")
37
  img_w, img_h = original_img.size
38
 
39
  yolo_model = YOLO("yolov8n.pt")
40
+ # Extremely low confidence to catch anything
41
  results = yolo_model(image_path, verbose=True, conf=0.1)
42
 
43
  for r in results:
44
  for box in r.boxes:
45
+ # Class 39 is bottle. We also check Class 40 (Wine glass) or 41 (Cup) just in case
46
+ if int(box.cls) in [39, 40, 41]:
47
  x1, y1, x2, y2 = box.xyxy[0].tolist()
48
 
49
+ # Dynamic 25% Padding
 
50
  box_w = x2 - x1
51
  box_h = y2 - y1
 
 
52
  pad_x = int(box_w * 0.25)
53
  pad_y = int(box_h * 0.25)
54
 
 
55
  x1 = max(0, x1 - pad_x)
56
  y1 = max(0, y1 - pad_y)
57
  x2 = min(img_w, x2 + pad_x)
 
72
  except Exception as e:
73
  print(f"❌ YOLO CRASH: {e}")
74
  try:
75
+ return [Image.open(image_path).convert("RGB")]
76
  except:
77
  return []
78
 
 
110
  debug_images = crops
111
 
112
  # Start with the best crop
113
+ target_img = crops[0] if crops else Image.open(img_path).convert("RGB")
114
 
115
+ # Helper function with FIXED calling signature
116
  def identify_spirit(image_input):
117
+ # Ensure image is RGB to prevent pipeline errors
118
+ if image_input.mode != "RGB":
119
+ image_input = image_input.convert("RGB")
120
+
121
+ prompt = "User: <image>\nRead the label on the bottle. What is the specific brand and type of alcohol? Be precise.\nAssistant:"
122
+
123
+ # FIXED: Passing prompt as a positional argument (the second argument)
124
+ # This fixes the "ValueError: You must provide text" error
125
+ out = vision_pipe(image_input, prompt, generate_kwargs={"max_new_tokens": 50})
126
+
127
  text = out[0]['generated_text']
128
  if "Assistant:" in text:
129
  return text.split("Assistant:")[-1].strip()
130
  return text.replace("User: <image>", "").strip()
131
 
132
+ # Run Pass 1
133
+ try:
134
+ inventory = identify_spirit(target_img)
135
+ inventory = re.sub(r'<.*?>', '', inventory).strip().split('.')[0]
136
+ print(f"🔍 Pass 1 Result: {inventory}")
137
+
138
+ # Generic Fallback Logic
139
+ generic_terms = ["vodka", "gin", "rum", "tequila", "whiskey", "whisky", "bourbon", "brandy", "alcohol", "liquor", "spirit", "bottle", "drink", "glass"]
 
 
 
 
 
 
 
140
 
141
+ # If the answer is too short or generic, try the FULL image
142
+ if inventory.lower() in generic_terms or len(inventory) < 4:
143
+ print("⚠️ Result too generic. Trying FULL IMAGE...")
144
+ full_img_result = identify_spirit(Image.open(img_path).convert("RGB"))
145
+ full_img_result = re.sub(r'<.*?>', '', full_img_result).strip().split('.')[0]
146
+
147
+ if len(full_img_result) > len(inventory):
148
+ inventory = full_img_result
149
+ print(f"✅ Pass 2 Result: {inventory}")
150
+
151
+ except Exception as e:
152
+ print(f"❌ Vision Pipeline Failed: {e}")
153
+ inventory = "Unknown Spirit"
154
 
155
  # 2. RAG (Recipe Search)
156
  recipe_context = ""
 
172
  else:
173
  response = f"I see you have **{inventory}**! I don't have a specific recipe for that in the current library. Should I suggest a classic drink?"
174
 
175
+ # Gradio 6.0 Dictionary Format
176
  history.append({"role": "user", "content": message})
177
  history.append({"role": "assistant", "content": response})
178