skshimada commited on
Commit
c4c69b9
Β·
verified Β·
1 Parent(s): 527c0df

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -69
app.py CHANGED
@@ -27,128 +27,92 @@ vision_pipe = pipeline(
27
  print("πŸ“š Loading Embedding Engine...")
28
  embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
29
 
30
- # --- BOTTLE DETECTION ---
31
  def get_bottle_crops(image_path):
32
- print(f"πŸ” DEBUG: Starting YOLO on {image_path}")
33
  found_crops = []
34
-
35
  try:
36
  original_img = Image.open(image_path).convert("RGB")
37
  img_w, img_h = original_img.size
38
 
39
  yolo_model = YOLO("yolov8n.pt")
40
- results = yolo_model(image_path, verbose=True, conf=0.1)
41
 
42
  for r in results:
43
  for box in r.boxes:
44
- if int(box.cls) in [39, 40, 41]: # Bottle, Wine Glass, Cup
45
  x1, y1, x2, y2 = box.xyxy[0].tolist()
 
 
46
 
47
- # Dynamic 25% Padding
48
- box_w = x2 - x1
49
- box_h = y2 - y1
50
- pad_x = int(box_w * 0.25)
51
- pad_y = int(box_h * 0.25)
52
-
53
- x1 = max(0, x1 - pad_x)
54
- y1 = max(0, y1 - pad_y)
55
- x2 = min(img_w, x2 + pad_x)
56
- y2 = min(img_h, y2 + pad_y)
57
-
58
- crop = original_img.crop((x1, y1, x2, y2))
59
- found_crops.append(crop)
60
 
61
  del yolo_model
62
  gc.collect()
 
 
 
63
 
64
- if not found_crops:
65
- print("⚠️ DEBUG: No bottles found. Returning full image.")
66
- return [original_img]
67
-
68
- return found_crops
69
-
70
- except Exception as e:
71
- print(f"❌ YOLO CRASH: {e}")
72
- try:
73
- return [Image.open(image_path).convert("RGB")]
74
- except:
75
- return []
76
-
77
- # --- RECIPE INGESTION (THE "HARD CUT" FIX) ---
78
  def ingest_recipes(files):
79
  if not files: return "❌ No files uploaded."
80
 
81
  docs = []
82
  for f in files:
83
  try:
84
- if f.name.endswith(".txt"):
85
- loader = TextLoader(f.name)
86
- docs.extend(loader.load())
87
- elif f.name.endswith(".pdf"):
88
- loader = PyPDFLoader(f.name)
89
- docs.extend(loader.load())
90
- except Exception as e:
91
- print(f"Error loading {f.name}: {e}")
92
 
93
  if not docs: return "❌ Could not extract text."
94
 
95
- # 1. Combine all pages/files into one massive text block
96
  full_text = "\n".join([d.page_content for d in docs])
97
-
98
- # 2. Strict Split: Cut exactly at the start of any line that says "Recipe:"
99
- # (?m)^ means "look at the start of a line"
100
  raw_chunks = re.split(r'(?m)^(?=Recipe:)', full_text)
101
 
102
  split_docs = []
103
  for chunk in raw_chunks:
104
- # Clean out those long 'βΈ»' separator lines
105
  clean_chunk = re.sub(r'βΈ»+', '', chunk).strip()
106
-
107
- # If the chunk actually has text in it, save it as a standalone recipe
108
  if len(clean_chunk) > 20:
109
  split_docs.append(Document(page_content=clean_chunk))
110
 
111
- # 3. Save to Database
112
  try:
113
- vector_store = Chroma.from_documents(
114
- documents=split_docs,
115
- embedding=embed_model,
116
- persist_directory=CHROMA_PATH
117
- )
118
  return f"βœ… Bar library updated. Strictly split into {len(split_docs)} individual recipes."
119
  except Exception as e:
120
  return f"❌ Database Error: {e}"
121
 
122
- # --- BARTENDER LOGIC ---
123
  def bartend(message, history, img_path, inventory):
124
  debug_images = []
125
 
126
  if img_path:
 
127
  crops = get_bottle_crops(img_path)
128
  debug_images = crops
129
- target_img = crops[0] if crops else Image.open(img_path).convert("RGB")
 
 
130
 
131
  def identify_spirit(image_input):
132
- if image_input.mode != "RGB": image_input = image_input.convert("RGB")
 
 
 
133
  prompt = "User: <image>\nRead the label. What is the specific brand and type of alcohol? Be precise.\nAssistant:"
134
- out = vision_pipe(image_input, prompt, generate_kwargs={"max_new_tokens": 50})
 
 
135
  text = out[0]['generated_text']
136
  if "Assistant:" in text: return text.split("Assistant:")[-1].strip()
137
  return text.replace("User: <image>", "").strip()
138
 
139
  try:
 
 
140
  inventory = identify_spirit(target_img)
141
  inventory = re.sub(r'<.*?>', '', inventory).strip().split('.')[0]
142
- print(f"πŸ” Pass 1 Result: {inventory}")
143
-
144
- generic_terms = ["vodka", "gin", "rum", "tequila", "whiskey", "whisky", "bourbon", "brandy", "alcohol", "liquor", "spirit", "bottle", "drink"]
145
- if inventory.lower() in generic_terms or len(inventory) < 4:
146
- print("⚠️ Result too generic. Trying FULL IMAGE...")
147
- full_img_result = identify_spirit(Image.open(img_path).convert("RGB"))
148
- full_img_result = re.sub(r'<.*?>', '', full_img_result).strip().split('.')[0]
149
- if len(full_img_result) > len(inventory):
150
- inventory = full_img_result
151
- print(f"βœ… Pass 2 Result: {inventory}")
152
 
153
  except Exception as e:
154
  print(f"❌ Vision Failed: {e}")
@@ -161,7 +125,7 @@ def bartend(message, history, img_path, inventory):
161
  vs = Chroma(persist_directory=CHROMA_PATH, embedding_function=embed_model)
162
  search_query = f"Cocktail recipe using {inventory}"
163
 
164
- # Retrieve the top 4 closest matching recipes
165
  results = vs.similarity_search(search_query, k=4)
166
  recipe_context = "\n\n---\n\n".join([d.page_content for d in results])
167
  except Exception as e:
@@ -193,7 +157,7 @@ with gr.Blocks() as demo:
193
  gr.Markdown("---")
194
  img = gr.Image(type="filepath", label="2. Photo of your Bottle")
195
 
196
- with gr.Accordion("πŸ” Vision Debug", open=True):
197
  debug_gallery = gr.Gallery(label="YOLO Crops", columns=2, height="auto")
198
 
199
  with gr.Column(scale=2):
 
27
  print("πŸ“š Loading Embedding Engine...")
28
  embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
29
 
30
+ # --- BOTTLE DETECTION (JUST FOR DEBUG GALLERY NOW) ---
31
  def get_bottle_crops(image_path):
 
32
  found_crops = []
 
33
  try:
34
  original_img = Image.open(image_path).convert("RGB")
35
  img_w, img_h = original_img.size
36
 
37
  yolo_model = YOLO("yolov8n.pt")
38
+ results = yolo_model(image_path, verbose=False, conf=0.1)
39
 
40
  for r in results:
41
  for box in r.boxes:
42
+ if int(box.cls) in [39, 40, 41]:
43
  x1, y1, x2, y2 = box.xyxy[0].tolist()
44
+ box_w, box_h = x2 - x1, y2 - y1
45
+ pad_x, pad_y = int(box_w * 0.25), int(box_h * 0.25)
46
 
47
+ x1, y1 = max(0, x1 - pad_x), max(0, y1 - pad_y)
48
+ x2, y2 = min(img_w, x2 + pad_x), min(img_h, y2 + pad_y)
49
+ found_crops.append(original_img.crop((x1, y1, x2, y2)))
 
 
 
 
 
 
 
 
 
 
50
 
51
  del yolo_model
52
  gc.collect()
53
+ return found_crops if found_crops else [original_img]
54
+ except Exception:
55
+ return []
56
 
57
+ # --- RECIPE INGESTION (HARD CUT METHOD) ---
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  def ingest_recipes(files):
59
  if not files: return "❌ No files uploaded."
60
 
61
  docs = []
62
  for f in files:
63
  try:
64
+ if f.name.endswith(".txt"): docs.extend(TextLoader(f.name).load())
65
+ elif f.name.endswith(".pdf"): docs.extend(PyPDFLoader(f.name).load())
66
+ except Exception as e: print(f"Error: {e}")
 
 
 
 
 
67
 
68
  if not docs: return "❌ Could not extract text."
69
 
 
70
  full_text = "\n".join([d.page_content for d in docs])
 
 
 
71
  raw_chunks = re.split(r'(?m)^(?=Recipe:)', full_text)
72
 
73
  split_docs = []
74
  for chunk in raw_chunks:
 
75
  clean_chunk = re.sub(r'βΈ»+', '', chunk).strip()
 
 
76
  if len(clean_chunk) > 20:
77
  split_docs.append(Document(page_content=clean_chunk))
78
 
 
79
  try:
80
+ Chroma.from_documents(split_docs, embed_model, persist_directory=CHROMA_PATH)
 
 
 
 
81
  return f"βœ… Bar library updated. Strictly split into {len(split_docs)} individual recipes."
82
  except Exception as e:
83
  return f"❌ Database Error: {e}"
84
 
85
+ # --- BARTENDER LOGIC (SPEED OPTIMIZED) ---
86
  def bartend(message, history, img_path, inventory):
87
  debug_images = []
88
 
89
  if img_path:
90
+ # Run YOLO just so the user can see what it isolated in the gallery
91
  crops = get_bottle_crops(img_path)
92
  debug_images = crops
93
+
94
+ # WE NOW USE THE FULL IMAGE FOR THE AI TO GUARANTEE IT SEES THE BRAND
95
+ target_img = Image.open(img_path).convert("RGB")
96
 
97
  def identify_spirit(image_input):
98
+ # πŸš€ SPEED FIX 1: Shrink massive phone photos to 512x512
99
+ # This stops the CPU from choking on millions of pixels
100
+ image_input.thumbnail((512, 512))
101
+
102
  prompt = "User: <image>\nRead the label. What is the specific brand and type of alcohol? Be precise.\nAssistant:"
103
+
104
+ # πŸš€ SPEED FIX 2: Max 15 tokens. CPU takes ~1s per token. Less tokens = much faster.
105
+ out = vision_pipe(image_input, prompt, generate_kwargs={"max_new_tokens": 15})
106
  text = out[0]['generated_text']
107
  if "Assistant:" in text: return text.split("Assistant:")[-1].strip()
108
  return text.replace("User: <image>", "").strip()
109
 
110
  try:
111
+ # πŸš€ SPEED FIX 3: Single Pass. No more running the vision model twice.
112
+ print("πŸ” Starting Vision Pass (Speed Optimized)...")
113
  inventory = identify_spirit(target_img)
114
  inventory = re.sub(r'<.*?>', '', inventory).strip().split('.')[0]
115
+ print(f"βœ… Vision Result: {inventory}")
 
 
 
 
 
 
 
 
 
116
 
117
  except Exception as e:
118
  print(f"❌ Vision Failed: {e}")
 
125
  vs = Chroma(persist_directory=CHROMA_PATH, embedding_function=embed_model)
126
  search_query = f"Cocktail recipe using {inventory}"
127
 
128
+ # Fetch top 4 recipes
129
  results = vs.similarity_search(search_query, k=4)
130
  recipe_context = "\n\n---\n\n".join([d.page_content for d in results])
131
  except Exception as e:
 
157
  gr.Markdown("---")
158
  img = gr.Image(type="filepath", label="2. Photo of your Bottle")
159
 
160
+ with gr.Accordion("πŸ” Vision Debug", open=False):
161
  debug_gallery = gr.Gallery(label="YOLO Crops", columns=2, height="auto")
162
 
163
  with gr.Column(scale=2):