import gradio as gr
import os
import re
import torch
import gc
from PIL import Image
from transformers import pipeline
from langchain_chroma import Chroma
from langchain_community.document_loaders import PyPDFLoader, TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.documents import Document
from langchain_huggingface import HuggingFaceEmbeddings
from ultralytics import YOLO

# --- CONFIGURATION ---
CHROMA_PATH = "/tmp/chroma_db"
VISION_MODEL = "HuggingFaceTB/SmolVLM-Instruct"

# --- SYSTEM INITIALIZATION ---
print("⚙️ Loading Stable Vision Engine...")
vision_pipe = pipeline(
    "image-text-to-text", 
    model=VISION_MODEL, 
    model_kwargs={"dtype": torch.float32}, 
    device="cpu"
)

print("📚 Loading Embedding Engine...")
embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# --- BOTTLE DETECTION ---
def get_bottle_crops(image_path):
    print(f"🔍 DEBUG: Starting YOLO on {image_path}")
    found_crops = []
    
    try:
        original_img = Image.open(image_path).convert("RGB")
        img_w, img_h = original_img.size
        
        yolo_model = YOLO("yolov8n.pt")
        results = yolo_model(image_path, verbose=False, conf=0.1)
        
        for r in results:
            for box in r.boxes:
                if int(box.cls) in [39, 40, 41]: 
                    x1, y1, x2, y2 = box.xyxy[0].tolist()
                    
                    # 25% Padding to ensure the label isn't cut off
                    box_w, box_h = x2 - x1, y2 - y1
                    pad_x, pad_y = int(box_w * 0.25), int(box_h * 0.25)
                    
                    x1, y1 = max(0, x1 - pad_x), max(0, y1 - pad_y)
                    x2, y2 = min(img_w, x2 + pad_x), min(img_h, y2 + pad_y)
                    
                    found_crops.append(original_img.crop((x1, y1, x2, y2)))

        del yolo_model
        gc.collect()
        return found_crops if found_crops else [original_img]
    except Exception as e:
        print(f"❌ YOLO Error: {e}")
        return []

# --- RECIPE INGESTION ---
def ingest_recipes(files):
    if not files: return "❌ No files uploaded."
    
    docs = []
    for f in files:
        try:
            if f.name.endswith(".txt"): docs.extend(TextLoader(f.name).load())
            elif f.name.endswith(".pdf"): docs.extend(PyPDFLoader(f.name).load())
        except Exception as e: print(f"Error: {e}")
            
    if not docs: return "❌ Could not extract text."

    full_text = "\n".join([d.page_content for d in docs])
    raw_chunks = re.split(r'(?m)^(?=Recipe:)', full_text)
    
    split_docs = []
    for chunk in raw_chunks:
        clean_chunk = re.sub(r'⸻+', '', chunk).strip()
        if len(clean_chunk) > 20: 
            split_docs.append(Document(page_content=clean_chunk))

    try:
        Chroma.from_documents(split_docs, embed_model, persist_directory=CHROMA_PATH)
        return f"✅ Bar library updated. Strictly split into {len(split_docs)} individual recipes."
    except Exception as e:
        return f"❌ Database Error: {e}"

# --- BARTENDER LOGIC ---
def bartend(message, history, img_path, inventory):
    debug_images = []
    
    if img_path:
        crops = get_bottle_crops(img_path)
        debug_images = crops 
        
        # SPEED FIX 1: We return to using the tight crop, discarding the heavy background!
        target_img = crops[0] if crops else Image.open(img_path).convert("RGB")
        
        def identify_spirit(image_input):
            # SPEED FIX 2: Aggressive squishing. 
            # We copy the image so we don't blur the gallery debug version
            fast_img = image_input.copy()
            if fast_img.mode != "RGB": fast_img = fast_img.convert("RGB")
            
            # Shrink down to a max of 384x384. This makes CPU math practically instant.
            fast_img.thumbnail((384, 384))
            
            prompt = "User: <image>\nRead the label. What is the specific brand and type of alcohol? Be precise.\nAssistant:"
            
            # Keep token limit at 15. The 'brain' (Chroma) handles the long text, the 'eyes' just need to read the brand name.
            out = vision_pipe(fast_img, prompt, generate_kwargs={"max_new_tokens": 15})
            text = out[0]['generated_text']
            if "Assistant:" in text: return text.split("Assistant:")[-1].strip()
            return text.replace("User: <image>", "").strip()

        try:
            inventory = identify_spirit(target_img)
            inventory = re.sub(r'<.*?>', '', inventory).strip().split('.')[0]
            print(f"🔍 Pass 1 Result: {inventory}")
            
            generic_terms = ["vodka", "gin", "rum", "tequila", "whiskey", "whisky", "bourbon", "brandy", "alcohol", "liquor", "spirit", "bottle", "drink"]
            
            # ONLY fallback to the heavy full image if the crop failed us
            if inventory.lower() in generic_terms or len(inventory) < 4:
                print("⚠️ Result too generic. Trying FULL IMAGE...")
                full_img_result = identify_spirit(Image.open(img_path))
                full_img_result = re.sub(r'<.*?>', '', full_img_result).strip().split('.')[0]
                if len(full_img_result) > len(inventory):
                    inventory = full_img_result
                    print(f"✅ Pass 2 Result: {inventory}")
                    
        except Exception as e:
            print(f"❌ Vision Failed: {e}")
            inventory = "Unknown Spirit"

    recipe_context = ""
    if inventory and inventory not in ["Empty Shelf", "Unknown Spirit", ""]:
        try:
            if os.path.exists(CHROMA_PATH):
                vs = Chroma(persist_directory=CHROMA_PATH, embedding_function=embed_model)
                search_query = f"Cocktail recipe using {inventory}"
                
                # Fetch top 4 distinct recipes
                results = vs.similarity_search(search_query, k=4)
                recipe_context = "\n\n---\n\n".join([d.page_content for d in results])
        except Exception as e:
            print(f"Search error: {e}")

    if inventory == "Unknown Spirit":
        response = "I'm having trouble reading that label. Check the 'Vision Debug' gallery below—is the crop clear?"
    elif recipe_context:
        response = f"I see you have **{inventory}**. Here are a few options from your collection:\n\n{recipe_context}"
    else:
        response = f"I see you have **{inventory}**! I don't have a specific recipe for that in the current library. Should I suggest a classic drink?"

    history.append({"role": "user", "content": message})
    history.append({"role": "assistant", "content": response})

    return history, inventory, debug_images

# --- UI LAYOUT ---
with gr.Blocks() as demo:
    gr.Markdown("# 🍸 LocalAGI: The AI Sommelier")
    inv_state = gr.State("Empty Shelf")
    
    with gr.Row():
        with gr.Column(scale=1):
            file_up = gr.File(label="1. Upload Recipe PDFs/TXTs", file_count="multiple")
            ingest_btn = gr.Button("📥 Load Recipes into Memory")
            status = gr.Textbox(label="System Status", value="Ready")
            
            gr.Markdown("---")
            img = gr.Image(type="filepath", label="2. Photo of your Bottle")
            
            with gr.Accordion("🔍 Vision Debug", open=False):
                debug_gallery = gr.Gallery(label="YOLO Crops", columns=2, height="auto")
            
        with gr.Column(scale=2):
            chatbot = gr.Chatbot(height=500, label="Bartender Chat")
            msg = gr.Textbox(label="3. Your Message", placeholder="Ask for a drink suggestion...")
            send_btn = gr.Button("Mix It Up", variant="primary")

    ingest_btn.click(ingest_recipes, file_up, status)
    msg.submit(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state, debug_gallery])
    send_btn.click(bartend, [msg, chatbot, img, inv_state], [chatbot, inv_state, debug_gallery])

if __name__ == "__main__":
    demo.launch(theme=gr.themes.Soft())