File size: 8,099 Bytes
8edd4f5 c20bab5 7280e12 8edd4f5 c20bab5 8edd4f5 c20bab5 73d9e71 c20bab5 527c0df 8edd4f5 7280e12 c20bab5 8edd4f5 c20bab5 8d2f88f 0bae07f 8d2f88f 8edd4f5 c20bab5 8edd4f5 73d9e71 8edd4f5 73d9e71 ddb1921 73d9e71 1cc7f06 ce0a4da 4ba62ef ddb1921 c4c69b9 ddb1921 1cc7f06 c4c69b9 1cc7f06 73d9e71 c4c69b9 ddb1921 c4c69b9 73d9e71 c4c69b9 ddb1921 1cc7f06 c4c69b9 73d9e71 c4c69b9 ddb1921 73d9e71 7280e12 33c5c81 8edd4f5 8d2f88f c4c69b9 7280e12 4ba62ef 8d2f88f 43779e4 c4c69b9 43779e4 8edd4f5 73d9e71 c20bab5 ddb1921 7280e12 4ba62ef c4c69b9 73d9e71 099e0d3 4ba62ef 73d9e71 c4c69b9 33c5c81 c4c69b9 73d9e71 4ba62ef 33c5c81 4ba62ef ce0a4da 73d9e71 ce0a4da 33c5c81 ce0a4da c20bab5 1cc7f06 993f3d0 1cc7f06 33c5c81 73d9e71 43779e4 1cc7f06 c20bab5 993f3d0 ddb1921 993f3d0 43779e4 c20bab5 ddb1921 c20bab5 c3f6e08 ddb1921 c20bab5 993f3d0 8d2f88f c20bab5 8edd4f5 6b342db c20bab5 ddb1921 8d2f88f 6b342db 7280e12 c4c69b9 ddb1921 8edd4f5 993f3d0 6b342db 8d2f88f 7280e12 ddb1921 8edd4f5 7280e12 993f3d0 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 | import gradio as gr
import os
import re
import torch
import gc
from PIL import Image
from transformers import pipeline
from langchain_chroma import Chroma
from langchain_community.document_loaders import PyPDFLoader, TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.documents import Document
from langchain_huggingface import HuggingFaceEmbeddings
from ultralytics import YOLO
# --- CONFIGURATION ---
CHROMA_PATH = "/tmp/chroma_db"
VISION_MODEL = "HuggingFaceTB/SmolVLM-Instruct"
# --- SYSTEM INITIALIZATION ---
print("โ๏ธ Loading Stable Vision Engine...")
vision_pipe = pipeline(
"image-text-to-text",
model=VISION_MODEL,
model_kwargs={"dtype": torch.float32},
device="cpu"
)
print("๐ Loading Embedding Engine...")
embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
# --- BOTTLE DETECTION ---
def get_bottle_crops(image_path):
print(f"๐ DEBUG: Starting YOLO on {image_path}")
found_crops = []
try:
original_img = Image.open(image_path).convert("RGB")
img_w, img_h = original_img.size
yolo_model = YOLO("yolov8n.pt")
results = yolo_model(image_path, verbose=False, conf=0.1)
for r in results:
for box in r.boxes:
if int(box.cls) in [39, 40, 41]:
x1, y1, x2, y2 = box.xyxy[0].tolist()
# 25% Padding to ensure the label isn't cut off
box_w, box_h = x2 - x1, y2 - y1
pad_x, pad_y = int(box_w * 0.25), int(box_h * 0.25)
x1, y1 = max(0, x1 - pad_x), max(0, y1 - pad_y)
x2, y2 = min(img_w, x2 + pad_x), min(img_h, y2 + pad_y)
found_crops.append(original_img.crop((x1, y1, x2, y2)))
del yolo_model
gc.collect()
return found_crops if found_crops else [original_img]
except Exception as e:
print(f"โ YOLO Error: {e}")
return []
# --- RECIPE INGESTION ---
def ingest_recipes(files):
if not files: return "โ No files uploaded."
docs = []
for f in files:
try:
if f.name.endswith(".txt"): docs.extend(TextLoader(f.name).load())
elif f.name.endswith(".pdf"): docs.extend(PyPDFLoader(f.name).load())
except Exception as e: print(f"Error: {e}")
if not docs: return "โ Could not extract text."
full_text = "\n".join([d.page_content for d in docs])
raw_chunks = re.split(r'(?m)^(?=Recipe:)', full_text)
split_docs = []
for chunk in raw_chunks:
clean_chunk = re.sub(r'โธป+', '', chunk).strip()
if len(clean_chunk) > 20:
split_docs.append(Document(page_content=clean_chunk))
try:
Chroma.from_documents(split_docs, embed_model, persist_directory=CHROMA_PATH)
return f"โ
Bar library updated. Strictly split into {len(split_docs)} individual recipes."
except Exception as e:
return f"โ Database Error: {e}"
# --- BARTENDER LOGIC ---
def bartend(message, history, img_path, inventory):
debug_images = []
if img_path:
crops = get_bottle_crops(img_path)
debug_images = crops
# SPEED FIX 1: We return to using the tight crop, discarding the heavy background!
target_img = crops[0] if crops else Image.open(img_path).convert("RGB")
def identify_spirit(image_input):
# SPEED FIX 2: Aggressive squishing.
# We copy the image so we don't blur the gallery debug version
fast_img = image_input.copy()
if fast_img.mode != "RGB": fast_img = fast_img.convert("RGB")
# Shrink down to a max of 384x384. This makes CPU math practically instant.
fast_img.thumbnail((384, 384))
prompt = "User: <image>\nRead the label. What is the specific brand and type of alcohol? Be precise.\nAssistant:"
# Keep token limit at 15. The 'brain' (Chroma) handles the long text, the 'eyes' just need to read the brand name.
out = vision_pipe(fast_img, prompt, generate_kwargs={"max_new_tokens": 15})
text = out[0]['generated_text']
if "Assistant:" in text: return text.split("Assistant:")[-1].strip()
return text.replace("User: <image>", "").strip()
try:
inventory = identify_spirit(target_img)
inventory = re.sub(r'<.*?>', '', inventory).strip().split('.')[0]
print(f"๐ Pass 1 Result: {inventory}")
generic_terms = ["vodka", "gin", "rum", "tequila", "whiskey", "whisky", "bourbon", "brandy", "alcohol", "liquor", "spirit", "bottle", "drink"]
# ONLY fallback to the heavy full image if the crop failed us
if inventory.lower() in generic_terms or len(inventory) < 4:
print("โ ๏ธ Result too generic. Trying FULL IMAGE...")
full_img_result = identify_spirit(Image.open(img_path))
full_img_result = re.sub(r'<.*?>', '', full_img_result).strip().split('.')[0]
if len(full_img_result) > len(inventory):
inventory = full_img_result
print(f"โ
Pass 2 Result: {inventory}")
except Exception as e:
print(f"โ Vision Failed: {e}")
inventory = "Unknown Spirit"
recipe_context = ""
if inventory and inventory not in ["Empty Shelf", "Unknown Spirit", ""]:
try:
if os.path.exists(CHROMA_PATH):
vs = Chroma(persist_directory=CHROMA_PATH, embedding_function=embed_model)
search_query = f"Cocktail recipe using {inventory}"
# Fetch top 4 distinct recipes
results = vs.similarity_search(search_query, k=4)
recipe_context = "\n\n---\n\n".join([d.page_content for d in results])
except Exception as e:
print(f"Search error: {e}")
if inventory == "Unknown Spirit":
response = "I'm having trouble reading that label. Check the 'Vision Debug' gallery belowโis the crop clear?"
elif recipe_context:
response = f"I see you have **{inventory}**. Here are a few options from your collection:\n\n{recipe_context}"
else:
response = f"I see you have **{inventory}**! I don't have a specific recipe for that in the current library. Should I suggest a classic drink?"
history.append({"role": "user", "content": message})
history.append({"role": "assistant", "content": response})
return history, inventory, debug_images
# --- UI LAYOUT ---
with gr.Blocks() as demo:
gr.Markdown("# ๐ธ LocalAGI: The AI Sommelier")
inv_state = gr.State("Empty Shelf")
with gr.Row():
with gr.Column(scale=1):
file_up = gr.File(label="1. Upload Recipe PDFs/TXTs", file_count="multiple")
ingest_btn = gr.Button("๐ฅ Load Recipes into Memory")
status = gr.Textbox(label="System Status", value="Ready")
gr.Markdown("---")
img = gr.Image(type="filepath", label="2. Photo of your Bottle")
with gr.Accordion("๐ Vision Debug", open=False):
debug_gallery = gr.Gallery(label="YOLO Crops", columns=2, height="auto")
with gr.Column(scale=2):
chatbot = gr.Chatbot(height=500, label="Bartender Chat")
msg = gr.Textbox(label="3. Your Message", placeholder="Ask for a drink suggestion...")
send_btn = gr.Button("Mix It Up", variant="primary")
ingest_btn.click(ingest_recipes, file_up, status)
msg.submit(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state, debug_gallery])
send_btn.click(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state, debug_gallery])
if __name__ == "__main__":
demo.launch(theme=gr.themes.Soft()) |