skshimada commited on
Commit
73d9e71
Β·
verified Β·
1 Parent(s): c4c69b9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -16
app.py CHANGED
@@ -7,6 +7,7 @@ from PIL import Image
7
  from transformers import pipeline
8
  from langchain_chroma import Chroma
9
  from langchain_community.document_loaders import PyPDFLoader, TextLoader
 
10
  from langchain_core.documents import Document
11
  from langchain_huggingface import HuggingFaceEmbeddings
12
  from ultralytics import YOLO
@@ -27,9 +28,11 @@ vision_pipe = pipeline(
27
  print("πŸ“š Loading Embedding Engine...")
28
  embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
29
 
30
- # --- BOTTLE DETECTION (JUST FOR DEBUG GALLERY NOW) ---
31
  def get_bottle_crops(image_path):
 
32
  found_crops = []
 
33
  try:
34
  original_img = Image.open(image_path).convert("RGB")
35
  img_w, img_h = original_img.size
@@ -41,20 +44,24 @@ def get_bottle_crops(image_path):
41
  for box in r.boxes:
42
  if int(box.cls) in [39, 40, 41]:
43
  x1, y1, x2, y2 = box.xyxy[0].tolist()
 
 
44
  box_w, box_h = x2 - x1, y2 - y1
45
  pad_x, pad_y = int(box_w * 0.25), int(box_h * 0.25)
46
 
47
  x1, y1 = max(0, x1 - pad_x), max(0, y1 - pad_y)
48
  x2, y2 = min(img_w, x2 + pad_x), min(img_h, y2 + pad_y)
 
49
  found_crops.append(original_img.crop((x1, y1, x2, y2)))
50
 
51
  del yolo_model
52
  gc.collect()
53
  return found_crops if found_crops else [original_img]
54
- except Exception:
 
55
  return []
56
 
57
- # --- RECIPE INGESTION (HARD CUT METHOD) ---
58
  def ingest_recipes(files):
59
  if not files: return "❌ No files uploaded."
60
 
@@ -82,37 +89,49 @@ def ingest_recipes(files):
82
  except Exception as e:
83
  return f"❌ Database Error: {e}"
84
 
85
- # --- BARTENDER LOGIC (SPEED OPTIMIZED) ---
86
  def bartend(message, history, img_path, inventory):
87
  debug_images = []
88
 
89
  if img_path:
90
- # Run YOLO just so the user can see what it isolated in the gallery
91
  crops = get_bottle_crops(img_path)
92
  debug_images = crops
93
 
94
- # WE NOW USE THE FULL IMAGE FOR THE AI TO GUARANTEE IT SEES THE BRAND
95
- target_img = Image.open(img_path).convert("RGB")
96
 
97
  def identify_spirit(image_input):
98
- # πŸš€ SPEED FIX 1: Shrink massive phone photos to 512x512
99
- # This stops the CPU from choking on millions of pixels
100
- image_input.thumbnail((512, 512))
 
 
 
 
101
 
102
  prompt = "User: <image>\nRead the label. What is the specific brand and type of alcohol? Be precise.\nAssistant:"
103
 
104
- # πŸš€ SPEED FIX 2: Max 15 tokens. CPU takes ~1s per token. Less tokens = much faster.
105
- out = vision_pipe(image_input, prompt, generate_kwargs={"max_new_tokens": 15})
106
  text = out[0]['generated_text']
107
  if "Assistant:" in text: return text.split("Assistant:")[-1].strip()
108
  return text.replace("User: <image>", "").strip()
109
 
110
  try:
111
- # πŸš€ SPEED FIX 3: Single Pass. No more running the vision model twice.
112
- print("πŸ” Starting Vision Pass (Speed Optimized)...")
113
  inventory = identify_spirit(target_img)
114
  inventory = re.sub(r'<.*?>', '', inventory).strip().split('.')[0]
115
- print(f"βœ… Vision Result: {inventory}")
 
 
 
 
 
 
 
 
 
 
 
116
 
117
  except Exception as e:
118
  print(f"❌ Vision Failed: {e}")
@@ -125,7 +144,7 @@ def bartend(message, history, img_path, inventory):
125
  vs = Chroma(persist_directory=CHROMA_PATH, embedding_function=embed_model)
126
  search_query = f"Cocktail recipe using {inventory}"
127
 
128
- # Fetch top 4 recipes
129
  results = vs.similarity_search(search_query, k=4)
130
  recipe_context = "\n\n---\n\n".join([d.page_content for d in results])
131
  except Exception as e:
 
7
  from transformers import pipeline
8
  from langchain_chroma import Chroma
9
  from langchain_community.document_loaders import PyPDFLoader, TextLoader
10
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
11
  from langchain_core.documents import Document
12
  from langchain_huggingface import HuggingFaceEmbeddings
13
  from ultralytics import YOLO
 
28
  print("πŸ“š Loading Embedding Engine...")
29
  embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
30
 
31
+ # --- BOTTLE DETECTION ---
32
  def get_bottle_crops(image_path):
33
+ print(f"πŸ” DEBUG: Starting YOLO on {image_path}")
34
  found_crops = []
35
+
36
  try:
37
  original_img = Image.open(image_path).convert("RGB")
38
  img_w, img_h = original_img.size
 
44
  for box in r.boxes:
45
  if int(box.cls) in [39, 40, 41]:
46
  x1, y1, x2, y2 = box.xyxy[0].tolist()
47
+
48
+ # 25% Padding to ensure the label isn't cut off
49
  box_w, box_h = x2 - x1, y2 - y1
50
  pad_x, pad_y = int(box_w * 0.25), int(box_h * 0.25)
51
 
52
  x1, y1 = max(0, x1 - pad_x), max(0, y1 - pad_y)
53
  x2, y2 = min(img_w, x2 + pad_x), min(img_h, y2 + pad_y)
54
+
55
  found_crops.append(original_img.crop((x1, y1, x2, y2)))
56
 
57
  del yolo_model
58
  gc.collect()
59
  return found_crops if found_crops else [original_img]
60
+ except Exception as e:
61
+ print(f"❌ YOLO Error: {e}")
62
  return []
63
 
64
+ # --- RECIPE INGESTION ---
65
  def ingest_recipes(files):
66
  if not files: return "❌ No files uploaded."
67
 
 
89
  except Exception as e:
90
  return f"❌ Database Error: {e}"
91
 
92
+ # --- BARTENDER LOGIC ---
93
  def bartend(message, history, img_path, inventory):
94
  debug_images = []
95
 
96
  if img_path:
 
97
  crops = get_bottle_crops(img_path)
98
  debug_images = crops
99
 
100
+ # SPEED FIX 1: We return to using the tight crop, discarding the heavy background!
101
+ target_img = crops[0] if crops else Image.open(img_path).convert("RGB")
102
 
103
  def identify_spirit(image_input):
104
+ # SPEED FIX 2: Aggressive squishing.
105
+ # We copy the image so we don't blur the gallery debug version
106
+ fast_img = image_input.copy()
107
+ if fast_img.mode != "RGB": fast_img = fast_img.convert("RGB")
108
+
109
+ # Shrink down to a max of 384x384. This makes CPU math practically instant.
110
+ fast_img.thumbnail((384, 384))
111
 
112
  prompt = "User: <image>\nRead the label. What is the specific brand and type of alcohol? Be precise.\nAssistant:"
113
 
114
+ # Keep token limit at 15. The 'brain' (Chroma) handles the long text, the 'eyes' just need to read the brand name.
115
+ out = vision_pipe(fast_img, prompt, generate_kwargs={"max_new_tokens": 15})
116
  text = out[0]['generated_text']
117
  if "Assistant:" in text: return text.split("Assistant:")[-1].strip()
118
  return text.replace("User: <image>", "").strip()
119
 
120
  try:
 
 
121
  inventory = identify_spirit(target_img)
122
  inventory = re.sub(r'<.*?>', '', inventory).strip().split('.')[0]
123
+ print(f"πŸ” Pass 1 Result: {inventory}")
124
+
125
+ generic_terms = ["vodka", "gin", "rum", "tequila", "whiskey", "whisky", "bourbon", "brandy", "alcohol", "liquor", "spirit", "bottle", "drink"]
126
+
127
+ # ONLY fallback to the heavy full image if the crop failed us
128
+ if inventory.lower() in generic_terms or len(inventory) < 4:
129
+ print("⚠️ Result too generic. Trying FULL IMAGE...")
130
+ full_img_result = identify_spirit(Image.open(img_path))
131
+ full_img_result = re.sub(r'<.*?>', '', full_img_result).strip().split('.')[0]
132
+ if len(full_img_result) > len(inventory):
133
+ inventory = full_img_result
134
+ print(f"βœ… Pass 2 Result: {inventory}")
135
 
136
  except Exception as e:
137
  print(f"❌ Vision Failed: {e}")
 
144
  vs = Chroma(persist_directory=CHROMA_PATH, embedding_function=embed_model)
145
  search_query = f"Cocktail recipe using {inventory}"
146
 
147
+ # Fetch top 4 distinct recipes
148
  results = vs.similarity_search(search_query, k=4)
149
  recipe_context = "\n\n---\n\n".join([d.page_content for d in results])
150
  except Exception as e: