skshimada commited on
Commit
099e0d3
·
verified ·
1 Parent(s): d8f8152

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -17
app.py CHANGED
@@ -27,7 +27,7 @@ vision_pipe = pipeline(
27
  print("📚 Loading Embedding Engine...")
28
  embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
29
 
30
- # --- BOTTLE DETECTION ---
31
  def get_bottle_crops(image_path):
32
  try:
33
  yolo_model = YOLO("yolov8n.pt")
@@ -36,9 +36,13 @@ def get_bottle_crops(image_path):
36
  original_img = Image.open(image_path)
37
  for r in results:
38
  for box in r.boxes:
39
- if int(box.cls) == 39: # Bottle
40
  x1, y1, x2, y2 = box.xyxy[0].tolist()
41
- found_crops.append(original_img.crop((x1-5, y1-5, x2+5, y2+5)))
 
 
 
 
42
  del yolo_model
43
  gc.collect()
44
  return found_crops
@@ -77,20 +81,36 @@ def bartend(message, history, img_path, inventory):
77
  # 1. Vision Scanning
78
  if img_path:
79
  crops = get_bottle_crops(img_path)
 
80
  target_img = crops[0] if crops else Image.open(img_path)
81
- prompt_text = "What is the brand and type of alcohol in this image? Answer briefly."
 
 
 
82
 
83
  try:
84
- output = vision_pipe(target_img, prompt=prompt_text, generate_kwargs={"max_new_tokens": 30})
85
  raw_label = output[0]['generated_text']
86
- inventory = raw_label.split("Answer:")[-1].strip() if "Answer:" in raw_label else raw_label.replace(prompt_text, "").strip()
 
 
 
 
 
 
 
 
 
 
 
87
  except Exception as e:
88
  print(f"Vision error: {e}")
89
  inventory = "Unknown Spirit"
90
 
91
- # 2. RAG (Recipe Search)
92
  recipe_context = ""
93
- if inventory and inventory != "Empty Shelf":
 
94
  try:
95
  if os.path.exists(CHROMA_PATH):
96
  vs = Chroma(persist_directory=CHROMA_PATH, embedding_function=embed_model)
@@ -101,41 +121,40 @@ def bartend(message, history, img_path, inventory):
101
  print(f"Search error: {e}")
102
 
103
  # 3. Create the Response
104
- if recipe_context:
105
- response = f"I see you have **{inventory}**. Here is a recipe I found in your collection:\n\n{recipe_context}"
 
 
106
  else:
107
- response = f"I see you have **{inventory}**! I don't have a specific recipe for that in the library yet."
108
 
109
- # dictionary format for Gradio 6.0
110
  history.append({"role": "user", "content": message})
111
  history.append({"role": "assistant", "content": response})
112
 
113
  return history, inventory
114
 
115
  # --- UI LAYOUT ---
116
- # Removed theme from Blocks (it's now in launch)
117
  with gr.Blocks() as demo:
118
  gr.Markdown("# 🍸 LocalAGI: The AI Sommelier")
119
  inv_state = gr.State("Empty Shelf")
120
 
121
  with gr.Row():
122
  with gr.Column(scale=1):
123
- file_up = gr.File(label="1. Upload Recipes (PDF/TXT)", file_count="multiple")
124
  ingest_btn = gr.Button("📥 Load into Memory")
125
  status = gr.Textbox(label="System Status", value="Ready")
126
  gr.Markdown("---")
127
  img = gr.Image(type="filepath", label="2. Photo of your Bottle")
128
 
129
  with gr.Column(scale=2):
130
- # Removed type="messages" (dictionary format is now default in 6.0)
131
  chatbot = gr.Chatbot(height=500, label="Bartender Chat")
132
- msg = gr.Textbox(label="3. Your Message", placeholder="Ask for a suggestion...")
133
  send_btn = gr.Button("Mix It Up", variant="primary")
134
 
 
135
  ingest_btn.click(ingest_recipes, file_up, status)
136
  msg.submit(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state])
137
  send_btn.click(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state])
138
 
139
  if __name__ == "__main__":
140
- # Moved theme to launch() as required by Gradio 6.0
141
  demo.launch(theme=gr.themes.Soft())
 
27
  print("📚 Loading Embedding Engine...")
28
  embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
29
 
30
+ # --- BOTTLE DETECTION (YOLO) ---
31
  def get_bottle_crops(image_path):
32
  try:
33
  yolo_model = YOLO("yolov8n.pt")
 
36
  original_img = Image.open(image_path)
37
  for r in results:
38
  for box in r.boxes:
39
+ if int(box.cls) == 39: # COCO index for bottle
40
  x1, y1, x2, y2 = box.xyxy[0].tolist()
41
+ # Add a 10% margin to the crop to help the vision model see context
42
+ w, h = original_img.size
43
+ x1, y1 = max(0, x1 - 20), max(0, y1 - 20)
44
+ x2, y2 = min(w, x2 + 20), min(h, y2 + 20)
45
+ found_crops.append(original_img.crop((x1, y1, x2, y2)))
46
  del yolo_model
47
  gc.collect()
48
  return found_crops
 
81
  # 1. Vision Scanning
82
  if img_path:
83
  crops = get_bottle_crops(img_path)
84
+ # Use the first crop if available, otherwise the full image
85
  target_img = crops[0] if crops else Image.open(img_path)
86
+
87
+ # SmolVLM prefers this structured prompt format to separate image from instructions
88
+ # We use 'Assistant:' as a trigger for the model to begin its response
89
+ prompt_text = "User: <image>\nIdentify the brand and type of alcohol. Be concise.\nAssistant:"
90
 
91
  try:
92
+ output = vision_pipe(target_img, prompt=prompt_text, generate_kwargs={"max_new_tokens": 50})
93
  raw_label = output[0]['generated_text']
94
+
95
+ # Extract only the AI's new answer
96
+ if "Assistant:" in raw_label:
97
+ inventory = raw_label.split("Assistant:")[-1].strip()
98
+ else:
99
+ inventory = raw_label.replace(prompt_text, "").strip()
100
+
101
+ # Clean up potential leftover markdown or tags
102
+ inventory = re.sub(r'<.*?>', '', inventory).strip()
103
+ # If the model gives a full sentence, try to shorten it
104
+ inventory = inventory.split('.')[0]
105
+
106
  except Exception as e:
107
  print(f"Vision error: {e}")
108
  inventory = "Unknown Spirit"
109
 
110
+ # 2. RAG (Search the recipes)
111
  recipe_context = ""
112
+ # Safeguard: Don't search if we don't have a valid spirit name
113
+ if inventory and inventory not in ["Empty Shelf", "Unknown Spirit", ""]:
114
  try:
115
  if os.path.exists(CHROMA_PATH):
116
  vs = Chroma(persist_directory=CHROMA_PATH, embedding_function=embed_model)
 
121
  print(f"Search error: {e}")
122
 
123
  # 3. Create the Response
124
+ if inventory == "Unknown Spirit":
125
+ response = "I'm having trouble reading that label. Could you tell me what the bottle is, or try taking a clearer photo of just the label?"
126
+ elif recipe_context:
127
+ response = f"I see you have **{inventory}**. Here is a suggestion from your library:\n\n{recipe_context}"
128
  else:
129
+ response = f"I see you have **{inventory}**! I couldn't find a specific match in your uploaded books. Would you like a classic recommendation instead?"
130
 
 
131
  history.append({"role": "user", "content": message})
132
  history.append({"role": "assistant", "content": response})
133
 
134
  return history, inventory
135
 
136
  # --- UI LAYOUT ---
 
137
  with gr.Blocks() as demo:
138
  gr.Markdown("# 🍸 LocalAGI: The AI Sommelier")
139
  inv_state = gr.State("Empty Shelf")
140
 
141
  with gr.Row():
142
  with gr.Column(scale=1):
143
+ file_up = gr.File(label="1. Upload Recipe PDFs/TXTs", file_count="multiple")
144
  ingest_btn = gr.Button("📥 Load into Memory")
145
  status = gr.Textbox(label="System Status", value="Ready")
146
  gr.Markdown("---")
147
  img = gr.Image(type="filepath", label="2. Photo of your Bottle")
148
 
149
  with gr.Column(scale=2):
 
150
  chatbot = gr.Chatbot(height=500, label="Bartender Chat")
151
+ msg = gr.Textbox(label="3. Your Message", placeholder="Suggest a drink for me...")
152
  send_btn = gr.Button("Mix It Up", variant="primary")
153
 
154
+ # Connect UI events
155
  ingest_btn.click(ingest_recipes, file_up, status)
156
  msg.submit(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state])
157
  send_btn.click(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state])
158
 
159
  if __name__ == "__main__":
 
160
  demo.launch(theme=gr.themes.Soft())