Spaces:

skshimada
/

Hello

Sleeping

App Files Files Community

skshimada commited on Feb 18

Commit

c20bab5

verified ·

1 Parent(s): feb3d1b

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -150

app.py CHANGED Viewed

@@ -1,198 +1,117 @@
 import gradio as gr
-from llama_cpp import Llama
 import os
 import re
-import base64
-import io
-import shutil
 import gc
 from PIL import Image
-from huggingface_hub import hf_hub_download
 from langchain_chroma import Chroma
-from langchain_community.document_loaders import PyPDFLoader
-from langchain_core.embeddings import Embeddings
-from langchain_core.documents import Document
 # --- CONFIGURATION ---
-RETRIEVAL_K = 10
 CHROMA_PATH = "/tmp/chroma_db"
-if os.path.exists(CHROMA_PATH):
-    shutil.rmtree(CHROMA_PATH)
-os.makedirs(CHROMA_PATH, exist_ok=True)
-# --- GLOBAL MODELS ---
-CHAT_MODEL = None
-EMBED_MODEL = None
-VECTOR_STORE = None
-# --- EMBEDDING CLASS ---
-class LocalLlamaEmbeddings(Embeddings):
-    def __init__(self, model_path):
-        # Small context for embeddings to save RAM
-        self.model = Llama(model_path=model_path, embedding=True, verbose=False, n_ctx=512)
-    def embed_documents(self, texts):
-        return [self.model.create_embedding(t)['data'][0]['embedding'] for t in texts]
-    def embed_query(self, text):
-        return self.model.create_embedding(text)['data'][0]['embedding']
-# --- BOTTLE DETECTION (MEMORY OPTIMIZED) ---
 def get_bottle_crops(image_path):
-    from ultralytics import YOLO
-    yolo_model = YOLO("yolov8n.pt") # Downloads small weights automatically
     results = yolo_model(image_path, verbose=False)
     found_crops = []
     original_img = Image.open(image_path)
     for r in results:
         for box in r.boxes:
-            if int(box.cls) == 39 and box.conf > 0.3: # '39' is the COCO index for bottle
                 x1, y1, x2, y2 = box.xyxy[0].tolist()
                 found_crops.append(original_img.crop((x1-5, y1-5, x2+5, y2+5)))
-    # Cleanup YOLO immediately to free 1GB+ RAM
     del yolo_model
-    gc.collect()
     return found_crops
-# --- SYSTEM INITIALIZATION ---
-def init_system():
-    global CHAT_MODEL, EMBED_MODEL, VECTOR_STORE
-    print("⏳ Downloading models...")
-    c_path = hf_hub_download(repo_id="openbmb/MiniCPM-V-2_6-gguf", filename="MiniCPM-V-2_6-Q4_K_M.gguf")
-    v_path = hf_hub_download(repo_id="openbmb/MiniCPM-V-2_6-gguf", filename="mmproj-MiniCPM-V-2_6-f16.gguf")
-    e_path = hf_hub_download(repo_id="bartowski/nomic-embed-text-v1.5-GGUF", filename="nomic-embed-text-v1.5.Q4_K_M.gguf")
-    print("⚙️ Loading Chat & Vision...")
-    from llama_cpp.llama_chat_format import Llava16ChatHandler
-    chat_h = Llava16ChatHandler(clip_model_path=v_path)
-    CHAT_MODEL = Llama(
-        model_path=c_path,
-        n_ctx=2048, # Memory-safe context size
-        n_batch=512,
-        chat_handler=chat_h,
-        verbose=False
-    )
-    print("📚 Loading Embeddings...")
-    EMBED_MODEL = LocalLlamaEmbeddings(e_path)
-    VECTOR_STORE = Chroma(collection_name="docs", embedding_function=EMBED_MODEL, persist_directory=CHROMA_PATH)
-    return "✅ Bar is Open! (Models Loaded)"
-# --- UTILS ---
-def encode_image(image_obj):
-    if image_obj.mode != 'RGB': image_obj = image_obj.convert('RGB')
-    image_obj.thumbnail((1024, 1024))
-    buffered = io.BytesIO()
-    image_obj.save(buffered, format="JPEG", quality=85)
-    return base64.b64encode(buffered.getvalue()).decode('utf-8')
-def clean_text(text):
-    text = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL)
-    for trigger in ["INSTRUCTION:", "SOURCE:", "User Question:"]:
-        if trigger in text: text = text.split(trigger)[0]
-    return text.strip()
-# --- CORE LOGIC ---
 def ingest_recipes(files):
-    global VECTOR_STORE
-    if not VECTOR_STORE: return "❌ Load system first!"
     if not files: return "❌ No files uploaded."
     docs = []
     for f in files:
         if f.name.endswith(".txt"):
-            with open(f.name, "r") as file:
-                recipes = file.read().split("Recipe:")
-                docs.extend([Document(page_content=f"Recipe:{r}") for r in recipes if len(r) > 10])
         elif f.name.endswith(".pdf"):
             loader = PyPDFLoader(f.name)
             docs.extend(loader.load())
-    if docs:
-        VECTOR_STORE.add_documents(docs)
-        return f"✅ Successfully added {len(docs)} recipes to memory."
-    return "❌ No recipes found in files."
-def bartend(message, history, img_path, sys_prompt, temp, inv_state):
-    global CHAT_MODEL, VECTOR_STORE
-    if CHAT_MODEL is None:
-        yield history, "⚠️ Please click 'Initialize' first!", "", inv_state
-        return
-    # 1. Vision Analysis
     if img_path:
-        yield history, "👁️ Analyzing your bottles...", "", inv_state
         crops = get_bottle_crops(img_path)
-        detected = []
-        # Only analyze up to 2 crops to stay under RAM limits
-        for crop in (crops[:2] if crops else [Image.open(img_path)]):
-            b64 = encode_image(crop)
-            v_msg = [{"role":"user", "content":[{"type":"text","text":"Exact brand and spirit type?"},{"type":"image_url","image_url":{"url":f"data:image/jpeg;base64,{b64}"}}]}]
-            res = CHAT_MODEL.create_chat_completion(messages=v_msg, max_tokens=20)
-            label = clean_text(res['choices'][0]['message']['content'])
-            if label: detected.append(label)
-        inv_state = ", ".join(list(set(detected)))
-        yield history, "🔍 Searching recipes...", "", inv_state
-    # 2. RAG Retrieval
-    context = "No specific recipe found."
-    if inv_state and VECTOR_STORE:
-        results = VECTOR_STORE.similarity_search(inv_state, k=5)
-        context = "\n---\n".join([d.page_content for d in results])
-    # 3. Final Answer Generation
-    full_prompt = f"INVENTORY: {inv_state}\n\nRECIPE SOURCE:\n{context}\n\nUSER REQUEST: {message}"
-    messages = [{"role": "system", "content": sys_prompt}, {"role": "user", "content": full_prompt}]
-    response_text = ""
-    stream = CHAT_MODEL.create_chat_completion(messages=messages, stream=True, temperature=temp)
-    for chunk in stream:
-        if "content" in chunk["choices"][0]["delta"]:
-            response_text += chunk["choices"][0]["delta"]["content"]
-            # Update history for Gradio
-            new_history = history + [[message, clean_text(response_text)]]
-            yield new_history, "✅ Ready", context, inv_state
-# --- GRADIO UI ---
-with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
-    gr.Markdown("# 🍸 LocalAGI: The AI Sommelier")
-    inventory = gr.State("")
     with gr.Row():
         with gr.Column(scale=1):
-            status = gr.Textbox(label="Status", value="Ready to initialize")
-            init_btn = gr.Button("🚀 1. Initialize Bar", variant="primary")
-            file_up = gr.File(label="2. Add Recipe PDFs/Texts", file_count="multiple")
             ingest_btn = gr.Button("📥 Load Recipes")
         with gr.Column(scale=2):
-            chatbot = gr.Chatbot(label="Bartender", height=450)
-            with gr.Row():
-                msg_input = gr.Textbox(label="What are we drinking?", placeholder="I want something sour...", scale=4)
-                send_btn = gr.Button("Send", variant="primary", scale=1)
-            img_input = gr.Image(type="filepath", label="Upload Bottle Image (Optional)")
-            with gr.Accordion("Debug & Settings", open=False):
-                sys_prompt = gr.Textbox(value="You are a professional bartender. Use the provided recipes.", label="System Prompt")
-                temp_slider = gr.Slider(0, 1, 0.3, label="Creativity")
-                reasoning = gr.TextArea(label="Retrieved Context", interactive=False)
-    # Event Mapping
-    init_btn.click(init_system, None, status)
     ingest_btn.click(ingest_recipes, file_up, status)
-    msg_submit = msg_input.submit(bartend, [msg_input, chatbot, img_input, sys_prompt, temp_slider, inventory], [chatbot, status, reasoning, inventory])
-    btn_submit = send_btn.click(bartend, [msg_input, chatbot, img_input, sys_prompt, temp_slider, inventory], [chatbot, status, reasoning, inventory])
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
 import os
 import re
+import torch
 import gc
 from PIL import Image
+from transformers import pipeline
 from langchain_chroma import Chroma
+from langchain_community.document_loaders import PyPDFLoader, TextLoader
+from langchain_core.documents import Document
+from langchain_huggingface import HuggingFaceEmbeddings
+from ultralytics import YOLO
 # --- CONFIGURATION ---
 CHROMA_PATH = "/tmp/chroma_db"
+# Using a native HF Vision model that doesn't need C++ compilation
+VISION_MODEL = "HuggingFaceTB/SmolVLM-Instruct"
+# --- SYSTEM INITIALIZATION ---
+# This uses 'transformers', which is pre-installed on HF Spaces
+print("⚙️ Loading Stable Vision Engine...")
+vision_pipe = pipeline("image-to-text", model=VISION_MODEL, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="auto")
+print("📚 Loading Embedding Engine...")
+# This replaces the Llama-embeddings to avoid 'Building Wheels'
+embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+# --- BOTTLE DETECTION ---
 def get_bottle_crops(image_path):
+    yolo_model = YOLO("yolov8n.pt")
     results = yolo_model(image_path, verbose=False)
     found_crops = []
     original_img = Image.open(image_path)
     for r in results:
         for box in r.boxes:
+            if int(box.cls) == 39: # Bottle
                 x1, y1, x2, y2 = box.xyxy[0].tolist()
                 found_crops.append(original_img.crop((x1-5, y1-5, x2+5, y2+5)))
     del yolo_model
+    gc.collect()
     return found_crops
+# --- RECIPE INGESTION ---
 def ingest_recipes(files):
     if not files: return "❌ No files uploaded."
     docs = []
     for f in files:
         if f.name.endswith(".txt"):
+            loader = TextLoader(f.name)
+            docs.extend(loader.load())
         elif f.name.endswith(".pdf"):
             loader = PyPDFLoader(f.name)
             docs.extend(loader.load())
+    vector_store = Chroma.from_documents(
+        documents=docs,
+        embedding=embed_model,
+        persist_directory=CHROMA_PATH
+    )
+    return f"✅ Ingested {len(docs)} pages/recipes."
+# --- BARTENDER LOGIC ---
+def bartend(message, history, img_path, inventory):
+    # 1. Vision Scanning
     if img_path:
         crops = get_bottle_crops(img_path)
+        target = crops[0] if crops else Image.open(img_path)
+        # Use Transformers instead of llama-cpp for the label reading
+        output = vision_pipe(target, prompt="What brand of alcohol is this?", generate_kwargs={"max_new_tokens": 30})
+        inventory = output[0]['generated_text'].replace("brand", "").strip()
+    # 2. RAG (Search your PDFs)
+    context = ""
+    try:
+        vs = Chroma(persist_directory=CHROMA_PATH, embedding_function=embed_model)
+        search_query = f"{inventory} cocktail"
+        results = vs.similarity_search(search_query, k=3)
+        context = "\n".join([d.page_content for d in results])
+    except:
+        context = "No PDF recipes loaded yet."
+    # 3. Generate Response (Using a fast text pipeline)
+    # For the free tier, we use a simple text generator or the Vision model's text ability
+    prompt = f"System: You are a Master Sommelier. Inventory: {inventory}. Source: {context}. User: {message}"
+    # Simple response construction for stability
+    if "No PDF" in context:
+        response = f"I see you have {inventory}! Since no recipe books are loaded, I recommend a classic pairing. What's your flavor profile?"
+    else:
+        response = f"I found a recipe in your books for {inventory}!\n\n{context[:500]}..."
+    history.append((message, response))
+    return history, inventory
+# --- UI LAYOUT ---
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.HTML("<h1 style='text-align:center'>🍸 LocalAGI: The Cloud-Stable Sommelier</h1>")
+    inv_state = gr.State("Empty Shelf")
     with gr.Row():
         with gr.Column(scale=1):
+            file_up = gr.File(label="Upload Recipe PDFs", file_count="multiple")
             ingest_btn = gr.Button("📥 Load Recipes")
+            status = gr.Textbox(label="System Status", value="Ready")
         with gr.Column(scale=2):
+            chatbot = gr.Chatbot(height=400)
+            msg = gr.Textbox(label="Ask the Bartender")
+            img = gr.Image(type="filepath", label="Bottle Photo")
+            send_btn = gr.Button("Mix Drink", variant="primary")
     ingest_btn.click(ingest_recipes, file_up, status)
+    send_btn.click(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state])
 if __name__ == "__main__":
     demo.launch()