Spaces:

skshimada
/

Hello

Sleeping

App Files Files Community

skshimada commited on 11 days ago

Commit

7280e12

verified ·

1 Parent(s): 1f0b3b5

Update app.py

Browse files

Files changed (1) hide show

app.py +123 -91

app.py CHANGED Viewed

@@ -5,94 +5,103 @@ import re
 import base64
 import io
 import shutil
 from PIL import Image
 from huggingface_hub import hf_hub_download
 from langchain_chroma import Chroma
 from langchain_community.document_loaders import PyPDFLoader
 from langchain_core.embeddings import Embeddings
 from langchain_core.documents import Document
-from ultralytics import YOLO
-# --- CONFIGURATION & SPACE PREP ---
-RETRIEVAL_K = 15
-CHROMA_PATH = "/tmp/chroma_db" # Use /tmp for HF Spaces ephemeral storage
 if os.path.exists(CHROMA_PATH):
     shutil.rmtree(CHROMA_PATH)
 os.makedirs(CHROMA_PATH, exist_ok=True)
-# --- MODEL DOWNLOADER ---
-# Using GGUF models hosted on HF. You can change these repos/filenames.
-def download_models():
-    print("⏳ Downloading models from HF Hub (this may take a minute)...")
-    # Using MiniCPM-V-2_6 as requested in your original logic
-    chat_path = hf_hub_download(repo_id="openbmb/MiniCPM-V-2_6-gguf", filename="MiniCPM-V-2_6-Q4_K_M.gguf")
-    vis_path = hf_hub_download(repo_id="openbmb/MiniCPM-V-2_6-gguf", filename="mmproj-MiniCPM-V-2_6-f16.gguf")
-    # Using a standard embedding model
-    emb_path = hf_hub_download(repo_id="bartowski/nomic-embed-text-v1.5-GGUF", filename="nomic-embed-text-v1.5.Q4_K_M.gguf")
-    return chat_path, vis_path, emb_path
-# --- GLOBAL VARIABLES ---
 CHAT_MODEL = None
 EMBED_MODEL = None
 VECTOR_STORE = None
-YOLO_MODEL = YOLO("yolov8n.pt") # Standard YOLOv8 nano
-# --- CLASSES ---
 class LocalLlamaEmbeddings(Embeddings):
     def __init__(self, model_path):
-        self.model = Llama(model_path=model_path, embedding=True, verbose=False, n_ctx=2048)
     def embed_documents(self, texts):
         return [self.model.create_embedding(t)['data'][0]['embedding'] for t in texts]
     def embed_query(self, text):
         return self.model.create_embedding(text)['data'][0]['embedding']
-# --- HELPER FUNCTIONS ---
-def encode_image(image_obj):
-    if not image_obj: return None
-    if image_obj.mode != 'RGB': image_obj = image_obj.convert('RGB')
-    image_obj.thumbnail((1024, 1024))
-    buffered = io.BytesIO()
-    image_obj.save(buffered, format="JPEG", quality=85)
-    return base64.b64encode(buffered.getvalue()).decode('utf-8')
 def get_bottle_crops(image_path):
-    results = YOLO_MODEL(image_path, verbose=False)
     found_crops = []
     original_img = Image.open(image_path)
     for r in results:
         for box in r.boxes:
-            if int(box.cls) == 39 and box.conf > 0.3: # 39 is bottle in COCO
                 x1, y1, x2, y2 = box.xyxy[0].tolist()
-                found_crops.append(original_img.crop((x1-10, y1-10, x2+10, y2+10)))
     return found_crops
-def clean_vision_output(raw_text):
-    text = re.sub(r'<think>.*?</think>', '', raw_text, flags=re.DOTALL)
-    return text.strip()
-def clean_final_response(text):
-    text = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL)
-    for trigger in ["INSTRUCTION:", "SOURCE RECIPES FOUND:", "User Question:"]:
-        if trigger in text: text = text.split(trigger)[0]
-    return text.strip()
-# --- PIPELINE ---
 def init_system():
     global CHAT_MODEL, EMBED_MODEL, VECTOR_STORE
-    c_path, v_path, e_path = download_models()
     from llama_cpp.llama_chat_format import Llava16ChatHandler
     chat_h = Llava16ChatHandler(clip_model_path=v_path)
-    CHAT_MODEL = Llama(model_path=c_path, n_gpu_layers=0, n_ctx=4096, chat_handler=chat_h, verbose=False)
     EMBED_MODEL = LocalLlamaEmbeddings(e_path)
     VECTOR_STORE = Chroma(collection_name="docs", embedding_function=EMBED_MODEL, persist_directory=CHROMA_PATH)
-    return "✅ Bar is Open! Models Loaded."
-def ingest(files):
     global VECTOR_STORE
-    if not VECTOR_STORE or not files: return "⚠️ Please wait for models to load."
     docs = []
     for f in files:
         if f.name.endswith(".txt"):
@@ -100,67 +109,90 @@ def ingest(files):
                 recipes = file.read().split("Recipe:")
                 docs.extend([Document(page_content=f"Recipe:{r}") for r in recipes if len(r) > 10])
         elif f.name.endswith(".pdf"):
-            docs.extend(PyPDFLoader(f.name).load())
-    VECTOR_STORE.add_documents(docs)
-    return f"✅ Ingested {len(docs)} recipes."
-def chat_handler(message, history, img, sys_prompt, temp, strict, inv_state):
     if CHAT_MODEL is None:
-        yield history, "⚠️ Loading Models...", "", inv_state
-        init_system()
-    # Vision Logic
-    if img:
-        crops = get_bottle_crops(img) or [Image.open(img)]
         detected = []
-        for crop in crops[:3]: # Limit to 3 bottles for speed
             b64 = encode_image(crop)
-            v_msg = [{"role":"user", "content":[{"type":"text","text":"Brand and type of alcohol?"},{"type":"image_url","image_url":{"url":f"data:image/jpeg;base64,{b64}"}}]}]
             res = CHAT_MODEL.create_chat_completion(messages=v_msg, max_tokens=20)
-            detected.append(clean_vision_output(res['choices'][0]['message']['content']))
         inv_state = ", ".join(list(set(detected)))
-    # RAG Logic
-    context = "No specific recipes found."
-    if VECTOR_STORE and inv_state:
         results = VECTOR_STORE.similarity_search(inv_state, k=5)
         context = "\n---\n".join([d.page_content for d in results])
-    # Final Response
-    prompt = f"INVENTORY: {inv_state}\nRECIPES: {context}\nQUESTION: {message}"
-    messages = [{"role": "system", "content": sys_prompt}, {"role": "user", "content": prompt}]
-    response = ""
     stream = CHAT_MODEL.create_chat_completion(messages=messages, stream=True, temperature=temp)
     for chunk in stream:
         if "content" in chunk["choices"][0]["delta"]:
-            response += chunk["choices"][0]["delta"]["content"]
-            history_copy = history + [[message, clean_final_response(response)]]
-            yield history_copy, "Active", context, inv_state
-# --- UI ---
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🍸 LocalAGI: The AI Sommelier (HF Edition)")
-    h_state = gr.State([])
-    inv_state = gr.State("")
     with gr.Row():
         with gr.Column(scale=1):
-            status = gr.Textbox(label="System Status", value="Click 'Initialize' to start")
-            init_btn = gr.Button("🚀 1. Initialize Bar")
-            up = gr.File(file_count="multiple", label="2. Upload Recipes")
-            ingest_btn = gr.Button("📥 Ingest Recipes")
         with gr.Column(scale=2):
-            chatbot = gr.Chatbot(label="Bartender")
-            msg = gr.Textbox(label="Ask for a drink...")
-            img = gr.Image(type="filepath", label="Upload Bottle Photo")
-            with gr.Accordion("Settings", open=False):
-                sys_box = gr.Textbox(value="You are a Master Mixologist.", label="System Prompt")
-                temp = gr.Slider(0, 1, 0.7, label="Creativity")
     init_btn.click(init_system, None, status)
-    ingest_btn.click(ingest, up, status)
-    msg.submit(chat_handler, [msg, chatbot, img, sys_box, temp, gr.State(True), inv_state], [chatbot, status, gr.State(), inv_state])
-demo.launch()

 import base64
 import io
 import shutil
+import gc
 from PIL import Image
 from huggingface_hub import hf_hub_download
 from langchain_chroma import Chroma
 from langchain_community.document_loaders import PyPDFLoader
 from langchain_core.embeddings import Embeddings
 from langchain_core.documents import Document
+# --- CONFIGURATION ---
+RETRIEVAL_K = 10
+CHROMA_PATH = "/tmp/chroma_db"
 if os.path.exists(CHROMA_PATH):
     shutil.rmtree(CHROMA_PATH)
 os.makedirs(CHROMA_PATH, exist_ok=True)
+# --- GLOBAL MODELS ---
 CHAT_MODEL = None
 EMBED_MODEL = None
 VECTOR_STORE = None
+# --- EMBEDDING CLASS ---
 class LocalLlamaEmbeddings(Embeddings):
     def __init__(self, model_path):
+        # Small context for embeddings to save RAM
+        self.model = Llama(model_path=model_path, embedding=True, verbose=False, n_ctx=512)
     def embed_documents(self, texts):
         return [self.model.create_embedding(t)['data'][0]['embedding'] for t in texts]
     def embed_query(self, text):
         return self.model.create_embedding(text)['data'][0]['embedding']
+# --- BOTTLE DETECTION (MEMORY OPTIMIZED) ---
 def get_bottle_crops(image_path):
+    from ultralytics import YOLO
+    yolo_model = YOLO("yolov8n.pt") # Downloads small weights automatically
+    results = yolo_model(image_path, verbose=False)
     found_crops = []
     original_img = Image.open(image_path)
     for r in results:
         for box in r.boxes:
+            if int(box.cls) == 39 and box.conf > 0.3: # '39' is the COCO index for bottle
                 x1, y1, x2, y2 = box.xyxy[0].tolist()
+                found_crops.append(original_img.crop((x1-5, y1-5, x2+5, y2+5)))
+    # Cleanup YOLO immediately to free 1GB+ RAM
+    del yolo_model
+    gc.collect()
     return found_crops
+# --- SYSTEM INITIALIZATION ---
 def init_system():
     global CHAT_MODEL, EMBED_MODEL, VECTOR_STORE
+    print("⏳ Downloading models...")
+    c_path = hf_hub_download(repo_id="openbmb/MiniCPM-V-2_6-gguf", filename="MiniCPM-V-2_6-Q4_K_M.gguf")
+    v_path = hf_hub_download(repo_id="openbmb/MiniCPM-V-2_6-gguf", filename="mmproj-MiniCPM-V-2_6-f16.gguf")
+    e_path = hf_hub_download(repo_id="bartowski/nomic-embed-text-v1.5-GGUF", filename="nomic-embed-text-v1.5.Q4_K_M.gguf")
+    print("⚙️ Loading Chat & Vision...")
     from llama_cpp.llama_chat_format import Llava16ChatHandler
     chat_h = Llava16ChatHandler(clip_model_path=v_path)
+    CHAT_MODEL = Llama(
+        model_path=c_path,
+        n_ctx=2048, # Memory-safe context size
+        n_batch=512,
+        chat_handler=chat_h,
+        verbose=False
+    )
+    print("📚 Loading Embeddings...")
     EMBED_MODEL = LocalLlamaEmbeddings(e_path)
     VECTOR_STORE = Chroma(collection_name="docs", embedding_function=EMBED_MODEL, persist_directory=CHROMA_PATH)
+    return "✅ Bar is Open! (Models Loaded)"
+# --- UTILS ---
+def encode_image(image_obj):
+    if image_obj.mode != 'RGB': image_obj = image_obj.convert('RGB')
+    image_obj.thumbnail((1024, 1024))
+    buffered = io.BytesIO()
+    image_obj.save(buffered, format="JPEG", quality=85)
+    return base64.b64encode(buffered.getvalue()).decode('utf-8')
+def clean_text(text):
+    text = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL)
+    for trigger in ["INSTRUCTION:", "SOURCE:", "User Question:"]:
+        if trigger in text: text = text.split(trigger)[0]
+    return text.strip()
+# --- CORE LOGIC ---
+def ingest_recipes(files):
     global VECTOR_STORE
+    if not VECTOR_STORE: return "❌ Load system first!"
+    if not files: return "❌ No files uploaded."
     docs = []
     for f in files:
         if f.name.endswith(".txt"):
                 recipes = file.read().split("Recipe:")
                 docs.extend([Document(page_content=f"Recipe:{r}") for r in recipes if len(r) > 10])
         elif f.name.endswith(".pdf"):
+            loader = PyPDFLoader(f.name)
+            docs.extend(loader.load())
+    if docs:
+        VECTOR_STORE.add_documents(docs)
+        return f"✅ Successfully added {len(docs)} recipes to memory."
+    return "❌ No recipes found in files."
+def bartend(message, history, img_path, sys_prompt, temp, inv_state):
+    global CHAT_MODEL, VECTOR_STORE
     if CHAT_MODEL is None:
+        yield history, "⚠️ Please click 'Initialize' first!", "", inv_state
+        return
+    # 1. Vision Analysis
+    if img_path:
+        yield history, "👁️ Analyzing your bottles...", "", inv_state
+        crops = get_bottle_crops(img_path)
         detected = []
+        # Only analyze up to 2 crops to stay under RAM limits
+        for crop in (crops[:2] if crops else [Image.open(img_path)]):
             b64 = encode_image(crop)
+            v_msg = [{"role":"user", "content":[{"type":"text","text":"Exact brand and spirit type?"},{"type":"image_url","image_url":{"url":f"data:image/jpeg;base64,{b64}"}}]}]
             res = CHAT_MODEL.create_chat_completion(messages=v_msg, max_tokens=20)
+            label = clean_text(res['choices'][0]['message']['content'])
+            if label: detected.append(label)
         inv_state = ", ".join(list(set(detected)))
+        yield history, "🔍 Searching recipes...", "", inv_state
+    # 2. RAG Retrieval
+    context = "No specific recipe found."
+    if inv_state and VECTOR_STORE:
         results = VECTOR_STORE.similarity_search(inv_state, k=5)
         context = "\n---\n".join([d.page_content for d in results])
+    # 3. Final Answer Generation
+    full_prompt = f"INVENTORY: {inv_state}\n\nRECIPE SOURCE:\n{context}\n\nUSER REQUEST: {message}"
+    messages = [{"role": "system", "content": sys_prompt}, {"role": "user", "content": full_prompt}]
+    response_text = ""
     stream = CHAT_MODEL.create_chat_completion(messages=messages, stream=True, temperature=temp)
     for chunk in stream:
         if "content" in chunk["choices"][0]["delta"]:
+            response_text += chunk["choices"][0]["delta"]["content"]
+            # Update history for Gradio
+            new_history = history + [[message, clean_text(response_text)]]
+            yield new_history, "✅ Ready", context, inv_state
+# --- GRADIO UI ---
+with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
+    gr.Markdown("# 🍸 LocalAGI: The AI Sommelier")
+    inventory = gr.State("")
     with gr.Row():
         with gr.Column(scale=1):
+            status = gr.Textbox(label="Status", value="Ready to initialize")
+            init_btn = gr.Button("🚀 1. Initialize Bar", variant="primary")
+            file_up = gr.File(label="2. Add Recipe PDFs/Texts", file_count="multiple")
+            ingest_btn = gr.Button("📥 Load Recipes")
         with gr.Column(scale=2):
+            chatbot = gr.Chatbot(label="Bartender", height=450)
+            with gr.Row():
+                msg_input = gr.Textbox(label="What are we drinking?", placeholder="I want something sour...", scale=4)
+                send_btn = gr.Button("Send", variant="primary", scale=1)
+            img_input = gr.Image(type="filepath", label="Upload Bottle Image (Optional)")
+            with gr.Accordion("Debug & Settings", open=False):
+                sys_prompt = gr.Textbox(value="You are a professional bartender. Use the provided recipes.", label="System Prompt")
+                temp_slider = gr.Slider(0, 1, 0.3, label="Creativity")
+                reasoning = gr.TextArea(label="Retrieved Context", interactive=False)
+    # Event Mapping
     init_btn.click(init_system, None, status)
+    ingest_btn.click(ingest_recipes, file_up, status)
+    msg_submit = msg_input.submit(bartend, [msg_input, chatbot, img_input, sys_prompt, temp_slider, inventory], [chatbot, status, reasoning, inventory])
+    btn_submit = send_btn.click(bartend, [msg_input, chatbot, img_input, sys_prompt, temp_slider, inventory], [chatbot, status, reasoning, inventory])
+if __name__ == "__main__":
+    demo.launch()