skshimada commited on
Commit
7280e12
Β·
verified Β·
1 Parent(s): 1f0b3b5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +123 -91
app.py CHANGED
@@ -5,94 +5,103 @@ import re
5
  import base64
6
  import io
7
  import shutil
 
8
  from PIL import Image
9
  from huggingface_hub import hf_hub_download
10
  from langchain_chroma import Chroma
11
  from langchain_community.document_loaders import PyPDFLoader
12
  from langchain_core.embeddings import Embeddings
13
  from langchain_core.documents import Document
14
- from ultralytics import YOLO
15
 
16
- # --- CONFIGURATION & SPACE PREP ---
17
- RETRIEVAL_K = 15
18
- CHROMA_PATH = "/tmp/chroma_db" # Use /tmp for HF Spaces ephemeral storage
19
 
20
  if os.path.exists(CHROMA_PATH):
21
  shutil.rmtree(CHROMA_PATH)
22
  os.makedirs(CHROMA_PATH, exist_ok=True)
23
 
24
- # --- MODEL DOWNLOADER ---
25
- # Using GGUF models hosted on HF. You can change these repos/filenames.
26
- def download_models():
27
- print("⏳ Downloading models from HF Hub (this may take a minute)...")
28
- # Using MiniCPM-V-2_6 as requested in your original logic
29
- chat_path = hf_hub_download(repo_id="openbmb/MiniCPM-V-2_6-gguf", filename="MiniCPM-V-2_6-Q4_K_M.gguf")
30
- vis_path = hf_hub_download(repo_id="openbmb/MiniCPM-V-2_6-gguf", filename="mmproj-MiniCPM-V-2_6-f16.gguf")
31
- # Using a standard embedding model
32
- emb_path = hf_hub_download(repo_id="bartowski/nomic-embed-text-v1.5-GGUF", filename="nomic-embed-text-v1.5.Q4_K_M.gguf")
33
- return chat_path, vis_path, emb_path
34
-
35
- # --- GLOBAL VARIABLES ---
36
  CHAT_MODEL = None
37
  EMBED_MODEL = None
38
  VECTOR_STORE = None
39
- YOLO_MODEL = YOLO("yolov8n.pt") # Standard YOLOv8 nano
40
 
41
- # --- CLASSES ---
42
  class LocalLlamaEmbeddings(Embeddings):
43
  def __init__(self, model_path):
44
- self.model = Llama(model_path=model_path, embedding=True, verbose=False, n_ctx=2048)
 
45
  def embed_documents(self, texts):
46
  return [self.model.create_embedding(t)['data'][0]['embedding'] for t in texts]
47
  def embed_query(self, text):
48
  return self.model.create_embedding(text)['data'][0]['embedding']
49
 
50
- # --- HELPER FUNCTIONS ---
51
- def encode_image(image_obj):
52
- if not image_obj: return None
53
- if image_obj.mode != 'RGB': image_obj = image_obj.convert('RGB')
54
- image_obj.thumbnail((1024, 1024))
55
- buffered = io.BytesIO()
56
- image_obj.save(buffered, format="JPEG", quality=85)
57
- return base64.b64encode(buffered.getvalue()).decode('utf-8')
58
-
59
  def get_bottle_crops(image_path):
60
- results = YOLO_MODEL(image_path, verbose=False)
 
 
 
61
  found_crops = []
62
  original_img = Image.open(image_path)
63
  for r in results:
64
  for box in r.boxes:
65
- if int(box.cls) == 39 and box.conf > 0.3: # 39 is bottle in COCO
66
  x1, y1, x2, y2 = box.xyxy[0].tolist()
67
- found_crops.append(original_img.crop((x1-10, y1-10, x2+10, y2+10)))
 
 
 
 
68
  return found_crops
69
 
70
- def clean_vision_output(raw_text):
71
- text = re.sub(r'<think>.*?</think>', '', raw_text, flags=re.DOTALL)
72
- return text.strip()
73
-
74
- def clean_final_response(text):
75
- text = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL)
76
- for trigger in ["INSTRUCTION:", "SOURCE RECIPES FOUND:", "User Question:"]:
77
- if trigger in text: text = text.split(trigger)[0]
78
- return text.strip()
79
-
80
- # --- PIPELINE ---
81
  def init_system():
82
  global CHAT_MODEL, EMBED_MODEL, VECTOR_STORE
83
- c_path, v_path, e_path = download_models()
84
 
 
 
 
 
 
 
85
  from llama_cpp.llama_chat_format import Llava16ChatHandler
86
  chat_h = Llava16ChatHandler(clip_model_path=v_path)
87
 
88
- CHAT_MODEL = Llama(model_path=c_path, n_gpu_layers=0, n_ctx=4096, chat_handler=chat_h, verbose=False)
 
 
 
 
 
 
 
 
89
  EMBED_MODEL = LocalLlamaEmbeddings(e_path)
90
  VECTOR_STORE = Chroma(collection_name="docs", embedding_function=EMBED_MODEL, persist_directory=CHROMA_PATH)
91
- return "βœ… Bar is Open! Models Loaded."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
- def ingest(files):
 
94
  global VECTOR_STORE
95
- if not VECTOR_STORE or not files: return "⚠️ Please wait for models to load."
 
 
96
  docs = []
97
  for f in files:
98
  if f.name.endswith(".txt"):
@@ -100,67 +109,90 @@ def ingest(files):
100
  recipes = file.read().split("Recipe:")
101
  docs.extend([Document(page_content=f"Recipe:{r}") for r in recipes if len(r) > 10])
102
  elif f.name.endswith(".pdf"):
103
- docs.extend(PyPDFLoader(f.name).load())
104
- VECTOR_STORE.add_documents(docs)
105
- return f"βœ… Ingested {len(docs)} recipes."
106
-
107
- def chat_handler(message, history, img, sys_prompt, temp, strict, inv_state):
 
 
 
 
 
 
108
  if CHAT_MODEL is None:
109
- yield history, "⚠️ Loading Models...", "", inv_state
110
- init_system()
111
 
112
- # Vision Logic
113
- if img:
114
- crops = get_bottle_crops(img) or [Image.open(img)]
 
115
  detected = []
116
- for crop in crops[:3]: # Limit to 3 bottles for speed
 
 
117
  b64 = encode_image(crop)
118
- v_msg = [{"role":"user", "content":[{"type":"text","text":"Brand and type of alcohol?"},{"type":"image_url","image_url":{"url":f"data:image/jpeg;base64,{b64}"}}]}]
119
  res = CHAT_MODEL.create_chat_completion(messages=v_msg, max_tokens=20)
120
- detected.append(clean_vision_output(res['choices'][0]['message']['content']))
 
 
121
  inv_state = ", ".join(list(set(detected)))
 
122
 
123
- # RAG Logic
124
- context = "No specific recipes found."
125
- if VECTOR_STORE and inv_state:
126
  results = VECTOR_STORE.similarity_search(inv_state, k=5)
127
  context = "\n---\n".join([d.page_content for d in results])
128
 
129
- # Final Response
130
- prompt = f"INVENTORY: {inv_state}\nRECIPES: {context}\nQUESTION: {message}"
131
- messages = [{"role": "system", "content": sys_prompt}, {"role": "user", "content": prompt}]
132
 
133
- response = ""
134
  stream = CHAT_MODEL.create_chat_completion(messages=messages, stream=True, temperature=temp)
 
135
  for chunk in stream:
136
  if "content" in chunk["choices"][0]["delta"]:
137
- response += chunk["choices"][0]["delta"]["content"]
138
- history_copy = history + [[message, clean_final_response(response)]]
139
- yield history_copy, "Active", context, inv_state
140
-
141
- # --- UI ---
142
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
143
- gr.Markdown("# 🍸 LocalAGI: The AI Sommelier (HF Edition)")
144
- h_state = gr.State([])
145
- inv_state = gr.State("")
 
146
 
147
  with gr.Row():
148
  with gr.Column(scale=1):
149
- status = gr.Textbox(label="System Status", value="Click 'Initialize' to start")
150
- init_btn = gr.Button("πŸš€ 1. Initialize Bar")
151
- up = gr.File(file_count="multiple", label="2. Upload Recipes")
152
- ingest_btn = gr.Button("πŸ“₯ Ingest Recipes")
153
-
154
  with gr.Column(scale=2):
155
- chatbot = gr.Chatbot(label="Bartender")
156
- msg = gr.Textbox(label="Ask for a drink...")
157
- img = gr.Image(type="filepath", label="Upload Bottle Photo")
158
- with gr.Accordion("Settings", open=False):
159
- sys_box = gr.Textbox(value="You are a Master Mixologist.", label="System Prompt")
160
- temp = gr.Slider(0, 1, 0.7, label="Creativity")
161
-
 
 
 
 
 
 
162
  init_btn.click(init_system, None, status)
163
- ingest_btn.click(ingest, up, status)
164
- msg.submit(chat_handler, [msg, chatbot, img, sys_box, temp, gr.State(True), inv_state], [chatbot, status, gr.State(), inv_state])
 
 
165
 
166
- demo.launch()
 
 
5
  import base64
6
  import io
7
  import shutil
8
+ import gc
9
  from PIL import Image
10
  from huggingface_hub import hf_hub_download
11
  from langchain_chroma import Chroma
12
  from langchain_community.document_loaders import PyPDFLoader
13
  from langchain_core.embeddings import Embeddings
14
  from langchain_core.documents import Document
 
15
 
16
+ # --- CONFIGURATION ---
17
+ RETRIEVAL_K = 10
18
+ CHROMA_PATH = "/tmp/chroma_db"
19
 
20
  if os.path.exists(CHROMA_PATH):
21
  shutil.rmtree(CHROMA_PATH)
22
  os.makedirs(CHROMA_PATH, exist_ok=True)
23
 
24
+ # --- GLOBAL MODELS ---
 
 
 
 
 
 
 
 
 
 
 
25
  CHAT_MODEL = None
26
  EMBED_MODEL = None
27
  VECTOR_STORE = None
 
28
 
29
+ # --- EMBEDDING CLASS ---
30
  class LocalLlamaEmbeddings(Embeddings):
31
  def __init__(self, model_path):
32
+ # Small context for embeddings to save RAM
33
+ self.model = Llama(model_path=model_path, embedding=True, verbose=False, n_ctx=512)
34
  def embed_documents(self, texts):
35
  return [self.model.create_embedding(t)['data'][0]['embedding'] for t in texts]
36
  def embed_query(self, text):
37
  return self.model.create_embedding(text)['data'][0]['embedding']
38
 
39
+ # --- BOTTLE DETECTION (MEMORY OPTIMIZED) ---
 
 
 
 
 
 
 
 
40
  def get_bottle_crops(image_path):
41
+ from ultralytics import YOLO
42
+ yolo_model = YOLO("yolov8n.pt") # Downloads small weights automatically
43
+ results = yolo_model(image_path, verbose=False)
44
+
45
  found_crops = []
46
  original_img = Image.open(image_path)
47
  for r in results:
48
  for box in r.boxes:
49
+ if int(box.cls) == 39 and box.conf > 0.3: # '39' is the COCO index for bottle
50
  x1, y1, x2, y2 = box.xyxy[0].tolist()
51
+ found_crops.append(original_img.crop((x1-5, y1-5, x2+5, y2+5)))
52
+
53
+ # Cleanup YOLO immediately to free 1GB+ RAM
54
+ del yolo_model
55
+ gc.collect()
56
  return found_crops
57
 
58
+ # --- SYSTEM INITIALIZATION ---
 
 
 
 
 
 
 
 
 
 
59
  def init_system():
60
  global CHAT_MODEL, EMBED_MODEL, VECTOR_STORE
 
61
 
62
+ print("⏳ Downloading models...")
63
+ c_path = hf_hub_download(repo_id="openbmb/MiniCPM-V-2_6-gguf", filename="MiniCPM-V-2_6-Q4_K_M.gguf")
64
+ v_path = hf_hub_download(repo_id="openbmb/MiniCPM-V-2_6-gguf", filename="mmproj-MiniCPM-V-2_6-f16.gguf")
65
+ e_path = hf_hub_download(repo_id="bartowski/nomic-embed-text-v1.5-GGUF", filename="nomic-embed-text-v1.5.Q4_K_M.gguf")
66
+
67
+ print("βš™οΈ Loading Chat & Vision...")
68
  from llama_cpp.llama_chat_format import Llava16ChatHandler
69
  chat_h = Llava16ChatHandler(clip_model_path=v_path)
70
 
71
+ CHAT_MODEL = Llama(
72
+ model_path=c_path,
73
+ n_ctx=2048, # Memory-safe context size
74
+ n_batch=512,
75
+ chat_handler=chat_h,
76
+ verbose=False
77
+ )
78
+
79
+ print("πŸ“š Loading Embeddings...")
80
  EMBED_MODEL = LocalLlamaEmbeddings(e_path)
81
  VECTOR_STORE = Chroma(collection_name="docs", embedding_function=EMBED_MODEL, persist_directory=CHROMA_PATH)
82
+
83
+ return "βœ… Bar is Open! (Models Loaded)"
84
+
85
+ # --- UTILS ---
86
+ def encode_image(image_obj):
87
+ if image_obj.mode != 'RGB': image_obj = image_obj.convert('RGB')
88
+ image_obj.thumbnail((1024, 1024))
89
+ buffered = io.BytesIO()
90
+ image_obj.save(buffered, format="JPEG", quality=85)
91
+ return base64.b64encode(buffered.getvalue()).decode('utf-8')
92
+
93
+ def clean_text(text):
94
+ text = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL)
95
+ for trigger in ["INSTRUCTION:", "SOURCE:", "User Question:"]:
96
+ if trigger in text: text = text.split(trigger)[0]
97
+ return text.strip()
98
 
99
+ # --- CORE LOGIC ---
100
+ def ingest_recipes(files):
101
  global VECTOR_STORE
102
+ if not VECTOR_STORE: return "❌ Load system first!"
103
+ if not files: return "❌ No files uploaded."
104
+
105
  docs = []
106
  for f in files:
107
  if f.name.endswith(".txt"):
 
109
  recipes = file.read().split("Recipe:")
110
  docs.extend([Document(page_content=f"Recipe:{r}") for r in recipes if len(r) > 10])
111
  elif f.name.endswith(".pdf"):
112
+ loader = PyPDFLoader(f.name)
113
+ docs.extend(loader.load())
114
+
115
+ if docs:
116
+ VECTOR_STORE.add_documents(docs)
117
+ return f"βœ… Successfully added {len(docs)} recipes to memory."
118
+ return "❌ No recipes found in files."
119
+
120
+ def bartend(message, history, img_path, sys_prompt, temp, inv_state):
121
+ global CHAT_MODEL, VECTOR_STORE
122
+
123
  if CHAT_MODEL is None:
124
+ yield history, "⚠️ Please click 'Initialize' first!", "", inv_state
125
+ return
126
 
127
+ # 1. Vision Analysis
128
+ if img_path:
129
+ yield history, "πŸ‘οΈ Analyzing your bottles...", "", inv_state
130
+ crops = get_bottle_crops(img_path)
131
  detected = []
132
+
133
+ # Only analyze up to 2 crops to stay under RAM limits
134
+ for crop in (crops[:2] if crops else [Image.open(img_path)]):
135
  b64 = encode_image(crop)
136
+ v_msg = [{"role":"user", "content":[{"type":"text","text":"Exact brand and spirit type?"},{"type":"image_url","image_url":{"url":f"data:image/jpeg;base64,{b64}"}}]}]
137
  res = CHAT_MODEL.create_chat_completion(messages=v_msg, max_tokens=20)
138
+ label = clean_text(res['choices'][0]['message']['content'])
139
+ if label: detected.append(label)
140
+
141
  inv_state = ", ".join(list(set(detected)))
142
+ yield history, "πŸ” Searching recipes...", "", inv_state
143
 
144
+ # 2. RAG Retrieval
145
+ context = "No specific recipe found."
146
+ if inv_state and VECTOR_STORE:
147
  results = VECTOR_STORE.similarity_search(inv_state, k=5)
148
  context = "\n---\n".join([d.page_content for d in results])
149
 
150
+ # 3. Final Answer Generation
151
+ full_prompt = f"INVENTORY: {inv_state}\n\nRECIPE SOURCE:\n{context}\n\nUSER REQUEST: {message}"
152
+ messages = [{"role": "system", "content": sys_prompt}, {"role": "user", "content": full_prompt}]
153
 
154
+ response_text = ""
155
  stream = CHAT_MODEL.create_chat_completion(messages=messages, stream=True, temperature=temp)
156
+
157
  for chunk in stream:
158
  if "content" in chunk["choices"][0]["delta"]:
159
+ response_text += chunk["choices"][0]["delta"]["content"]
160
+ # Update history for Gradio
161
+ new_history = history + [[message, clean_text(response_text)]]
162
+ yield new_history, "βœ… Ready", context, inv_state
163
+
164
+ # --- GRADIO UI ---
165
+ with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
166
+ gr.Markdown("# 🍸 LocalAGI: The AI Sommelier")
167
+
168
+ inventory = gr.State("")
169
 
170
  with gr.Row():
171
  with gr.Column(scale=1):
172
+ status = gr.Textbox(label="Status", value="Ready to initialize")
173
+ init_btn = gr.Button("πŸš€ 1. Initialize Bar", variant="primary")
174
+ file_up = gr.File(label="2. Add Recipe PDFs/Texts", file_count="multiple")
175
+ ingest_btn = gr.Button("πŸ“₯ Load Recipes")
176
+
177
  with gr.Column(scale=2):
178
+ chatbot = gr.Chatbot(label="Bartender", height=450)
179
+ with gr.Row():
180
+ msg_input = gr.Textbox(label="What are we drinking?", placeholder="I want something sour...", scale=4)
181
+ send_btn = gr.Button("Send", variant="primary", scale=1)
182
+
183
+ img_input = gr.Image(type="filepath", label="Upload Bottle Image (Optional)")
184
+
185
+ with gr.Accordion("Debug & Settings", open=False):
186
+ sys_prompt = gr.Textbox(value="You are a professional bartender. Use the provided recipes.", label="System Prompt")
187
+ temp_slider = gr.Slider(0, 1, 0.3, label="Creativity")
188
+ reasoning = gr.TextArea(label="Retrieved Context", interactive=False)
189
+
190
+ # Event Mapping
191
  init_btn.click(init_system, None, status)
192
+ ingest_btn.click(ingest_recipes, file_up, status)
193
+
194
+ msg_submit = msg_input.submit(bartend, [msg_input, chatbot, img_input, sys_prompt, temp_slider, inventory], [chatbot, status, reasoning, inventory])
195
+ btn_submit = send_btn.click(bartend, [msg_input, chatbot, img_input, sys_prompt, temp_slider, inventory], [chatbot, status, reasoning, inventory])
196
 
197
+ if __name__ == "__main__":
198
+ demo.launch()