skshimada commited on
Commit
c20bab5
·
verified ·
1 Parent(s): feb3d1b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -150
app.py CHANGED
@@ -1,198 +1,117 @@
1
  import gradio as gr
2
- from llama_cpp import Llama
3
  import os
4
  import re
5
- import base64
6
- import io
7
- import shutil
8
  import gc
9
  from PIL import Image
10
- from huggingface_hub import hf_hub_download
11
  from langchain_chroma import Chroma
12
- from langchain_community.document_loaders import PyPDFLoader
13
- from langchain_core.embeddings import Embeddings
14
- from langchain_core.documents import Document
 
15
 
16
  # --- CONFIGURATION ---
17
- RETRIEVAL_K = 10
18
  CHROMA_PATH = "/tmp/chroma_db"
 
 
19
 
20
- if os.path.exists(CHROMA_PATH):
21
- shutil.rmtree(CHROMA_PATH)
22
- os.makedirs(CHROMA_PATH, exist_ok=True)
23
-
24
- # --- GLOBAL MODELS ---
25
- CHAT_MODEL = None
26
- EMBED_MODEL = None
27
- VECTOR_STORE = None
28
 
29
- # --- EMBEDDING CLASS ---
30
- class LocalLlamaEmbeddings(Embeddings):
31
- def __init__(self, model_path):
32
- # Small context for embeddings to save RAM
33
- self.model = Llama(model_path=model_path, embedding=True, verbose=False, n_ctx=512)
34
- def embed_documents(self, texts):
35
- return [self.model.create_embedding(t)['data'][0]['embedding'] for t in texts]
36
- def embed_query(self, text):
37
- return self.model.create_embedding(text)['data'][0]['embedding']
38
 
39
- # --- BOTTLE DETECTION (MEMORY OPTIMIZED) ---
40
  def get_bottle_crops(image_path):
41
- from ultralytics import YOLO
42
- yolo_model = YOLO("yolov8n.pt") # Downloads small weights automatically
43
  results = yolo_model(image_path, verbose=False)
44
-
45
  found_crops = []
46
  original_img = Image.open(image_path)
47
  for r in results:
48
  for box in r.boxes:
49
- if int(box.cls) == 39 and box.conf > 0.3: # '39' is the COCO index for bottle
50
  x1, y1, x2, y2 = box.xyxy[0].tolist()
51
  found_crops.append(original_img.crop((x1-5, y1-5, x2+5, y2+5)))
52
-
53
- # Cleanup YOLO immediately to free 1GB+ RAM
54
  del yolo_model
55
- gc.collect()
56
  return found_crops
57
 
58
- # --- SYSTEM INITIALIZATION ---
59
- def init_system():
60
- global CHAT_MODEL, EMBED_MODEL, VECTOR_STORE
61
-
62
- print("⏳ Downloading models...")
63
- c_path = hf_hub_download(repo_id="openbmb/MiniCPM-V-2_6-gguf", filename="MiniCPM-V-2_6-Q4_K_M.gguf")
64
- v_path = hf_hub_download(repo_id="openbmb/MiniCPM-V-2_6-gguf", filename="mmproj-MiniCPM-V-2_6-f16.gguf")
65
- e_path = hf_hub_download(repo_id="bartowski/nomic-embed-text-v1.5-GGUF", filename="nomic-embed-text-v1.5.Q4_K_M.gguf")
66
-
67
- print("⚙️ Loading Chat & Vision...")
68
- from llama_cpp.llama_chat_format import Llava16ChatHandler
69
- chat_h = Llava16ChatHandler(clip_model_path=v_path)
70
-
71
- CHAT_MODEL = Llama(
72
- model_path=c_path,
73
- n_ctx=2048, # Memory-safe context size
74
- n_batch=512,
75
- chat_handler=chat_h,
76
- verbose=False
77
- )
78
-
79
- print("📚 Loading Embeddings...")
80
- EMBED_MODEL = LocalLlamaEmbeddings(e_path)
81
- VECTOR_STORE = Chroma(collection_name="docs", embedding_function=EMBED_MODEL, persist_directory=CHROMA_PATH)
82
-
83
- return "✅ Bar is Open! (Models Loaded)"
84
-
85
- # --- UTILS ---
86
- def encode_image(image_obj):
87
- if image_obj.mode != 'RGB': image_obj = image_obj.convert('RGB')
88
- image_obj.thumbnail((1024, 1024))
89
- buffered = io.BytesIO()
90
- image_obj.save(buffered, format="JPEG", quality=85)
91
- return base64.b64encode(buffered.getvalue()).decode('utf-8')
92
-
93
- def clean_text(text):
94
- text = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL)
95
- for trigger in ["INSTRUCTION:", "SOURCE:", "User Question:"]:
96
- if trigger in text: text = text.split(trigger)[0]
97
- return text.strip()
98
-
99
- # --- CORE LOGIC ---
100
  def ingest_recipes(files):
101
- global VECTOR_STORE
102
- if not VECTOR_STORE: return "❌ Load system first!"
103
  if not files: return "❌ No files uploaded."
104
 
105
  docs = []
106
  for f in files:
107
  if f.name.endswith(".txt"):
108
- with open(f.name, "r") as file:
109
- recipes = file.read().split("Recipe:")
110
- docs.extend([Document(page_content=f"Recipe:{r}") for r in recipes if len(r) > 10])
111
  elif f.name.endswith(".pdf"):
112
  loader = PyPDFLoader(f.name)
113
  docs.extend(loader.load())
114
 
115
- if docs:
116
- VECTOR_STORE.add_documents(docs)
117
- return f"✅ Successfully added {len(docs)} recipes to memory."
118
- return "❌ No recipes found in files."
119
-
120
- def bartend(message, history, img_path, sys_prompt, temp, inv_state):
121
- global CHAT_MODEL, VECTOR_STORE
122
-
123
- if CHAT_MODEL is None:
124
- yield history, "⚠️ Please click 'Initialize' first!", "", inv_state
125
- return
126
 
127
- # 1. Vision Analysis
 
 
128
  if img_path:
129
- yield history, "👁️ Analyzing your bottles...", "", inv_state
130
  crops = get_bottle_crops(img_path)
131
- detected = []
132
-
133
- # Only analyze up to 2 crops to stay under RAM limits
134
- for crop in (crops[:2] if crops else [Image.open(img_path)]):
135
- b64 = encode_image(crop)
136
- v_msg = [{"role":"user", "content":[{"type":"text","text":"Exact brand and spirit type?"},{"type":"image_url","image_url":{"url":f"data:image/jpeg;base64,{b64}"}}]}]
137
- res = CHAT_MODEL.create_chat_completion(messages=v_msg, max_tokens=20)
138
- label = clean_text(res['choices'][0]['message']['content'])
139
- if label: detected.append(label)
140
-
141
- inv_state = ", ".join(list(set(detected)))
142
- yield history, "🔍 Searching recipes...", "", inv_state
143
-
144
- # 2. RAG Retrieval
145
- context = "No specific recipe found."
146
- if inv_state and VECTOR_STORE:
147
- results = VECTOR_STORE.similarity_search(inv_state, k=5)
148
- context = "\n---\n".join([d.page_content for d in results])
149
-
150
- # 3. Final Answer Generation
151
- full_prompt = f"INVENTORY: {inv_state}\n\nRECIPE SOURCE:\n{context}\n\nUSER REQUEST: {message}"
152
- messages = [{"role": "system", "content": sys_prompt}, {"role": "user", "content": full_prompt}]
153
 
154
- response_text = ""
155
- stream = CHAT_MODEL.create_chat_completion(messages=messages, stream=True, temperature=temp)
156
-
157
- for chunk in stream:
158
- if "content" in chunk["choices"][0]["delta"]:
159
- response_text += chunk["choices"][0]["delta"]["content"]
160
- # Update history for Gradio
161
- new_history = history + [[message, clean_text(response_text)]]
162
- yield new_history, "✅ Ready", context, inv_state
163
-
164
- # --- GRADIO UI ---
165
- with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
166
- gr.Markdown("# 🍸 LocalAGI: The AI Sommelier")
167
-
168
- inventory = gr.State("")
169
 
170
  with gr.Row():
171
  with gr.Column(scale=1):
172
- status = gr.Textbox(label="Status", value="Ready to initialize")
173
- init_btn = gr.Button("🚀 1. Initialize Bar", variant="primary")
174
- file_up = gr.File(label="2. Add Recipe PDFs/Texts", file_count="multiple")
175
  ingest_btn = gr.Button("📥 Load Recipes")
 
176
 
177
  with gr.Column(scale=2):
178
- chatbot = gr.Chatbot(label="Bartender", height=450)
179
- with gr.Row():
180
- msg_input = gr.Textbox(label="What are we drinking?", placeholder="I want something sour...", scale=4)
181
- send_btn = gr.Button("Send", variant="primary", scale=1)
182
-
183
- img_input = gr.Image(type="filepath", label="Upload Bottle Image (Optional)")
184
-
185
- with gr.Accordion("Debug & Settings", open=False):
186
- sys_prompt = gr.Textbox(value="You are a professional bartender. Use the provided recipes.", label="System Prompt")
187
- temp_slider = gr.Slider(0, 1, 0.3, label="Creativity")
188
- reasoning = gr.TextArea(label="Retrieved Context", interactive=False)
189
 
190
- # Event Mapping
191
- init_btn.click(init_system, None, status)
192
  ingest_btn.click(ingest_recipes, file_up, status)
193
-
194
- msg_submit = msg_input.submit(bartend, [msg_input, chatbot, img_input, sys_prompt, temp_slider, inventory], [chatbot, status, reasoning, inventory])
195
- btn_submit = send_btn.click(bartend, [msg_input, chatbot, img_input, sys_prompt, temp_slider, inventory], [chatbot, status, reasoning, inventory])
196
 
197
  if __name__ == "__main__":
198
  demo.launch()
 
1
  import gradio as gr
 
2
  import os
3
  import re
4
+ import torch
 
 
5
  import gc
6
  from PIL import Image
7
+ from transformers import pipeline
8
  from langchain_chroma import Chroma
9
+ from langchain_community.document_loaders import PyPDFLoader, TextLoader
10
+ from langchain_core.documents import Document
11
+ from langchain_huggingface import HuggingFaceEmbeddings
12
+ from ultralytics import YOLO
13
 
14
  # --- CONFIGURATION ---
 
15
  CHROMA_PATH = "/tmp/chroma_db"
16
+ # Using a native HF Vision model that doesn't need C++ compilation
17
+ VISION_MODEL = "HuggingFaceTB/SmolVLM-Instruct"
18
 
19
+ # --- SYSTEM INITIALIZATION ---
20
+ # This uses 'transformers', which is pre-installed on HF Spaces
21
+ print("⚙️ Loading Stable Vision Engine...")
22
+ vision_pipe = pipeline("image-to-text", model=VISION_MODEL, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="auto")
 
 
 
 
23
 
24
+ print("📚 Loading Embedding Engine...")
25
+ # This replaces the Llama-embeddings to avoid 'Building Wheels'
26
+ embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
 
 
 
 
 
 
27
 
28
+ # --- BOTTLE DETECTION ---
29
  def get_bottle_crops(image_path):
30
+ yolo_model = YOLO("yolov8n.pt")
 
31
  results = yolo_model(image_path, verbose=False)
 
32
  found_crops = []
33
  original_img = Image.open(image_path)
34
  for r in results:
35
  for box in r.boxes:
36
+ if int(box.cls) == 39: # Bottle
37
  x1, y1, x2, y2 = box.xyxy[0].tolist()
38
  found_crops.append(original_img.crop((x1-5, y1-5, x2+5, y2+5)))
 
 
39
  del yolo_model
40
+ gc.collect()
41
  return found_crops
42
 
43
+ # --- RECIPE INGESTION ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  def ingest_recipes(files):
 
 
45
  if not files: return "❌ No files uploaded."
46
 
47
  docs = []
48
  for f in files:
49
  if f.name.endswith(".txt"):
50
+ loader = TextLoader(f.name)
51
+ docs.extend(loader.load())
 
52
  elif f.name.endswith(".pdf"):
53
  loader = PyPDFLoader(f.name)
54
  docs.extend(loader.load())
55
 
56
+ vector_store = Chroma.from_documents(
57
+ documents=docs,
58
+ embedding=embed_model,
59
+ persist_directory=CHROMA_PATH
60
+ )
61
+ return f"✅ Ingested {len(docs)} pages/recipes."
 
 
 
 
 
62
 
63
+ # --- BARTENDER LOGIC ---
64
+ def bartend(message, history, img_path, inventory):
65
+ # 1. Vision Scanning
66
  if img_path:
 
67
  crops = get_bottle_crops(img_path)
68
+ target = crops[0] if crops else Image.open(img_path)
69
+ # Use Transformers instead of llama-cpp for the label reading
70
+ output = vision_pipe(target, prompt="What brand of alcohol is this?", generate_kwargs={"max_new_tokens": 30})
71
+ inventory = output[0]['generated_text'].replace("brand", "").strip()
72
+
73
+ # 2. RAG (Search your PDFs)
74
+ context = ""
75
+ try:
76
+ vs = Chroma(persist_directory=CHROMA_PATH, embedding_function=embed_model)
77
+ search_query = f"{inventory} cocktail"
78
+ results = vs.similarity_search(search_query, k=3)
79
+ context = "\n".join([d.page_content for d in results])
80
+ except:
81
+ context = "No PDF recipes loaded yet."
82
+
83
+ # 3. Generate Response (Using a fast text pipeline)
84
+ # For the free tier, we use a simple text generator or the Vision model's text ability
85
+ prompt = f"System: You are a Master Sommelier. Inventory: {inventory}. Source: {context}. User: {message}"
 
 
 
 
86
 
87
+ # Simple response construction for stability
88
+ if "No PDF" in context:
89
+ response = f"I see you have {inventory}! Since no recipe books are loaded, I recommend a classic pairing. What's your flavor profile?"
90
+ else:
91
+ response = f"I found a recipe in your books for {inventory}!\n\n{context[:500]}..."
92
+
93
+ history.append((message, response))
94
+ return history, inventory
95
+
96
+ # --- UI LAYOUT ---
97
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
98
+ gr.HTML("<h1 style='text-align:center'>🍸 LocalAGI: The Cloud-Stable Sommelier</h1>")
99
+ inv_state = gr.State("Empty Shelf")
 
 
100
 
101
  with gr.Row():
102
  with gr.Column(scale=1):
103
+ file_up = gr.File(label="Upload Recipe PDFs", file_count="multiple")
 
 
104
  ingest_btn = gr.Button("📥 Load Recipes")
105
+ status = gr.Textbox(label="System Status", value="Ready")
106
 
107
  with gr.Column(scale=2):
108
+ chatbot = gr.Chatbot(height=400)
109
+ msg = gr.Textbox(label="Ask the Bartender")
110
+ img = gr.Image(type="filepath", label="Bottle Photo")
111
+ send_btn = gr.Button("Mix Drink", variant="primary")
 
 
 
 
 
 
 
112
 
 
 
113
  ingest_btn.click(ingest_recipes, file_up, status)
114
+ send_btn.click(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state])
 
 
115
 
116
  if __name__ == "__main__":
117
  demo.launch()