skshimada commited on
Commit
c3f6e08
·
verified ·
1 Parent(s): 1cc7f06

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -12
app.py CHANGED
@@ -12,13 +12,11 @@ from langchain_huggingface import HuggingFaceEmbeddings
12
  from ultralytics import YOLO
13
 
14
  # --- CONFIGURATION ---
15
- # We use /tmp because it is the only folder Hugging Face lets us write to
16
  CHROMA_PATH = "/tmp/chroma_db"
17
  VISION_MODEL = "HuggingFaceTB/SmolVLM-Instruct"
18
 
19
  # --- SYSTEM INITIALIZATION ---
20
  print("⚙️ Loading Stable Vision Engine...")
21
- # We use float32 and CPU to ensure the app doesn't crash on the free tier
22
  vision_pipe = pipeline(
23
  "image-text-to-text",
24
  model=VISION_MODEL,
@@ -38,7 +36,7 @@ def get_bottle_crops(image_path):
38
  original_img = Image.open(image_path)
39
  for r in results:
40
  for box in r.boxes:
41
- if int(box.cls) == 39: # 39 is the 'bottle' category
42
  x1, y1, x2, y2 = box.xyxy[0].tolist()
43
  found_crops.append(original_img.crop((x1-5, y1-5, x2+5, y2+5)))
44
  del yolo_model
@@ -67,7 +65,6 @@ def ingest_recipes(files):
67
  if not docs:
68
  return "❌ Could not extract text from files."
69
 
70
- # This creates the searchable 'brain' from your PDFs
71
  vector_store = Chroma.from_documents(
72
  documents=docs,
73
  embedding=embed_model,
@@ -82,15 +79,12 @@ def bartend(message, history, img_path, inventory):
82
  crops = get_bottle_crops(img_path)
83
  target_img = crops[0] if crops else Image.open(img_path)
84
 
85
- # We use a simple prompt string which works best for this pipeline version
86
  prompt_text = "What is the brand and type of alcohol in this image? Answer briefly."
87
 
88
  try:
89
- # Fixing the pipeline call format
90
  output = vision_pipe(target_img, prompt=prompt_text, generate_kwargs={"max_new_tokens": 30})
91
  raw_label = output[0]['generated_text']
92
 
93
- # Clean the output to get just the name
94
  if "Answer:" in raw_label:
95
  inventory = raw_label.split("Answer:")[-1].strip()
96
  else:
@@ -99,7 +93,7 @@ def bartend(message, history, img_path, inventory):
99
  print(f"Vision error: {e}")
100
  inventory = "Unknown Spirit"
101
 
102
- # 2. RAG (Search the PDF recipes)
103
  recipe_context = ""
104
  if inventory and inventory != "Empty Shelf":
105
  try:
@@ -117,7 +111,10 @@ def bartend(message, history, img_path, inventory):
117
  else:
118
  response = f"I see you have **{inventory}**! I don't have a specific recipe for that in the current library. Should I suggest a classic drink?"
119
 
120
- history.append((message, response))
 
 
 
121
  return history, inventory
122
 
123
  # --- UI LAYOUT ---
@@ -134,14 +131,15 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
134
  img = gr.Image(type="filepath", label="2. Photo of your Bottle")
135
 
136
  with gr.Column(scale=2):
137
- chatbot = gr.Chatbot(height=500, label="Bartender Chat")
 
138
  msg = gr.Textbox(label="3. Your Message", placeholder="Ask for a drink suggestion...")
139
  send_btn = gr.Button("Mix It Up", variant="primary")
140
 
141
- # Connect the buttons to the logic
142
  ingest_btn.click(ingest_recipes, file_up, status)
143
 
144
- # Allows pressing 'Enter' in the textbox or clicking the button
145
  msg.submit(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state])
146
  send_btn.click(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state])
147
 
 
12
  from ultralytics import YOLO
13
 
14
  # --- CONFIGURATION ---
 
15
  CHROMA_PATH = "/tmp/chroma_db"
16
  VISION_MODEL = "HuggingFaceTB/SmolVLM-Instruct"
17
 
18
  # --- SYSTEM INITIALIZATION ---
19
  print("⚙️ Loading Stable Vision Engine...")
 
20
  vision_pipe = pipeline(
21
  "image-text-to-text",
22
  model=VISION_MODEL,
 
36
  original_img = Image.open(image_path)
37
  for r in results:
38
  for box in r.boxes:
39
+ if int(box.cls) == 39: # Bottle index
40
  x1, y1, x2, y2 = box.xyxy[0].tolist()
41
  found_crops.append(original_img.crop((x1-5, y1-5, x2+5, y2+5)))
42
  del yolo_model
 
65
  if not docs:
66
  return "❌ Could not extract text from files."
67
 
 
68
  vector_store = Chroma.from_documents(
69
  documents=docs,
70
  embedding=embed_model,
 
79
  crops = get_bottle_crops(img_path)
80
  target_img = crops[0] if crops else Image.open(img_path)
81
 
 
82
  prompt_text = "What is the brand and type of alcohol in this image? Answer briefly."
83
 
84
  try:
 
85
  output = vision_pipe(target_img, prompt=prompt_text, generate_kwargs={"max_new_tokens": 30})
86
  raw_label = output[0]['generated_text']
87
 
 
88
  if "Answer:" in raw_label:
89
  inventory = raw_label.split("Answer:")[-1].strip()
90
  else:
 
93
  print(f"Vision error: {e}")
94
  inventory = "Unknown Spirit"
95
 
96
+ # 2. RAG (Recipe Search)
97
  recipe_context = ""
98
  if inventory and inventory != "Empty Shelf":
99
  try:
 
111
  else:
112
  response = f"I see you have **{inventory}**! I don't have a specific recipe for that in the current library. Should I suggest a classic drink?"
113
 
114
+ # --- UPDATED FOR GRADIO "MESSAGES" FORMAT ---
115
+ history.append({"role": "user", "content": message})
116
+ history.append({"role": "assistant", "content": response})
117
+
118
  return history, inventory
119
 
120
  # --- UI LAYOUT ---
 
131
  img = gr.Image(type="filepath", label="2. Photo of your Bottle")
132
 
133
  with gr.Column(scale=2):
134
+ # FIXED: Added type="messages" to match the new dictionary history format
135
+ chatbot = gr.Chatbot(height=500, label="Bartender Chat", type="messages")
136
  msg = gr.Textbox(label="3. Your Message", placeholder="Ask for a drink suggestion...")
137
  send_btn = gr.Button("Mix It Up", variant="primary")
138
 
139
+ # Connect the buttons
140
  ingest_btn.click(ingest_recipes, file_up, status)
141
 
142
+ # Connect Chat Events
143
  msg.submit(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state])
144
  send_btn.click(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state])
145