# -*- coding: utf-8 -*-
import os
import pandas as pd
import time
import logging
import gradio as gr
from typing import Optional, List, Dict # Keep typing
# from functools import lru_cache # Keep commented out
import random
import shutil
import re # Used for parsing recipe directions

# --- LangChain Imports ---
# Core
from langchain_core.documents import Document
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
# LLMs (using Google GenAI wrapper)
from langchain_google_genai import ChatGoogleGenerativeAI
# Vector Stores / Embeddings
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
# --- Other Imports ---
from datasets import load_dataset # Keep specific exception handling removed
import pyarrow # Keep explicit import

# Attempt to load python-dotenv for easier local API key management (optional)
try:
    from dotenv import load_dotenv
    load_dotenv() # Load variables from .env file if it exists
    DOTENV_AVAILABLE = True
except ImportError:
    DOTENV_AVAILABLE = False

# ==============================================================================
# Logging Configuration
# ==============================================================================
logging.basicConfig(
    level=logging.INFO, # INFO level is usually sufficient for running
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger('recipe_system')

# ==============================================================================
# Conditional Imports & Feature Flags
# ==============================================================================
# --- Vector Search Imports Check ---
VECTOR_IMPORTS_AVAILABLE = False
try:
    if HuggingFaceEmbeddings and Chroma and Document and load_dataset and pyarrow:
         VECTOR_IMPORTS_AVAILABLE = True
    logger.info("Vector search dependencies check: OK.")
except NameError:
    logger.error("Import check failed for vector search dependencies.")
    VECTOR_IMPORTS_AVAILABLE = False
# --- LLM (LangChain Google GenAI) Imports Check ---
LANGCHAIN_LLM_AVAILABLE = False
GOOGLE_API_KEY = None
try:
    if ChatGoogleGenerativeAI and PromptTemplate and StrOutputParser:
        GOOGLE_API_KEY = os.environ.get('GOOGLE_API_KEY')
        if not GOOGLE_API_KEY:
            logger.warning("GOOGLE_API_KEY environment variable not found.")
            if DOTENV_AVAILABLE: logger.info("Checked environment and .env file (if present).")
            else: logger.info("Checked environment variables.")
            LANGCHAIN_LLM_AVAILABLE = False
        else:
            logger.info("GOOGLE_API_KEY found. LangChain LLM dependencies appear available.")
            LANGCHAIN_LLM_AVAILABLE = True
except NameError:
    logger.error("Import check failed for LangChain LLM (Gemini) components.")
    logger.error("<<<<< Please ensure 'langchain-google-genai' is installed (in requirements.txt) >>>>>")
    LANGCHAIN_LLM_AVAILABLE = False

if not VECTOR_IMPORTS_AVAILABLE: logger.warning("Vector database imports failed - vector search disabled.")
if not LANGCHAIN_LLM_AVAILABLE: logger.warning("LangChain LLM setup incomplete - LLM features disabled.")
# --- End Import Check ---

# ==============================================================================
# Constants
# ==============================================================================
VECTOR_DB_PATH = "./recipe_vectordb" # Example path for persistence (not implemented yet)
DATASET_NAME = "corbt/all-recipes"
RECIPES_CSV_PATH = "recipes_data.csv"
GEMINI_MODEL_NAME = "models/gemini-1.5-flash-latest"

# ==============================================================================
# Recipe Recommendation System Class (Includes Agentic Routing)
# ==============================================================================
class RecipeRecommendationSystem:
    """
    Manages recipe data loading, parsing, indexing (vector or text), searching,
    and optional LLM query expansion & RAG using LangChain. Includes enhanced logging
    and minimal agentic routing.
    """
    def __init__(self):
        self.is_initialized = False
        self.initialization_error = None
        self.embeddings = None
        self.vector_db = None
        self.recipes_df = None
        self.sample_size = 1000
        self.backup_recipes = self._get_backup_recipes()
        self.lc_llm: Optional[ChatGoogleGenerativeAI] = None
        self.use_vector_search = VECTOR_IMPORTS_AVAILABLE
        self.use_llm = LANGCHAIN_LLM_AVAILABLE
        logger.info(f"System instance created. Vector search: {self.use_vector_search}, LLM (LangChain Gemini): {self.use_llm}")

    def _load_llm(self):
        if not self.use_llm:
            logger.info("LLM features disabled or dependencies missing.")
            return False
        if self.lc_llm:
            logger.info("LangChain LLM wrapper already configured.")
            return True
        try:
            logger.info(f"Configuring LangChain Gemini LLM wrapper for model: {GEMINI_MODEL_NAME}...")
            self.lc_llm = ChatGoogleGenerativeAI(
                model=GEMINI_MODEL_NAME, google_api_key=GOOGLE_API_KEY, temperature=0.7
            )
            logger.info("LangChain Gemini LLM wrapper configured successfully.")
            return True
        except Exception as e:
            logger.exception(f"Error configuring LangChain Gemini LLM wrapper: {e}")
            self.lc_llm = None
            self.use_llm = False
            self.initialization_error = (self.initialization_error or "") + f" | LangChain LLM Config Failed: {e}"
            return False

    def initialize(self, force_reload=False, sample_size=1000):
        start_time = time.time()
        logger.info(f"Initialize called. Force reload: {force_reload}, Sample size: {sample_size}")
        llm_ready = not self.use_llm or (self.lc_llm is not None)

        if (self.is_initialized and not force_reload and self.sample_size == sample_size and
                self.recipes_df is not None and not self.recipes_df.empty and llm_ready):
            search_mode_ok = (self.use_vector_search and self.vector_db is not None) or \
                             (not self.use_vector_search and self.vector_db is None)
            if search_mode_ok:
                logger.info(f"System already initialized ({'Vector' if self.use_vector_search else 'Text'} Search, LLM: {llm_ready}). Skipping.")
                return True

        self.sample_size = sample_size
        logger.info(f"{'Reloading' if self.is_initialized or force_reload else 'Initializing'} system...")
        self.is_initialized = False
        self.initialization_error = None
        self.vector_db = None # Reset DB on initialize/reload
        self.recipes_df = None # Reset DF
        if force_reload: self.lc_llm = None # Reset LLM wrapper too if forcing

        llm_load_success = self._load_llm()
        if not llm_load_success: logger.warning("LLM configuration failed. LLM features will be disabled.")

        should_attempt_vector = VECTOR_IMPORTS_AVAILABLE
        init_success = False
        if should_attempt_vector:
            logger.info("Attempting vector search initialization...")
            # Note: Persistence logic would go here - check if VECTOR_DB_PATH exists and load if !force_reload
            create_success = self._create_new_db() # Currently always creates new
            if create_success:
                logger.info("Vector DB creation successful.")
                self.use_vector_search = True
                init_success = True
            else:
                error_msg = self.initialization_error or "DB creation failed"
                logger.error(f"{error_msg}. Falling back to text search.")
                self.recipes_df = pd.DataFrame(self.backup_recipes).reset_index()
                self.use_vector_search = False; self.vector_db = None
                if self.recipes_df is not None and not self.recipes_df.empty:
                    logger.info(f"Loaded {len(self.recipes_df)} backup recipes for fallback.")
                    init_success = True
                else: logger.error("Failed to load backup recipes during fallback.")
        else: # Fallback if vector imports missing
            logger.info("Vector dependencies unavailable. Initializing with text search fallback.")
            self.recipes_df = pd.DataFrame(self.backup_recipes).reset_index()
            self.use_vector_search = False; self.vector_db = None
            if self.recipes_df is not None and not self.recipes_df.empty:
                logger.info(f"Loaded {len(self.recipes_df)} backup recipes.")
                init_success = True
            else: logger.error("Failed to load backup recipes.")

        elapsed = time.time() - start_time
        if init_success and self.recipes_df is not None and not self.recipes_df.empty:
            self.is_initialized = True
            search_type = "vector" if self.use_vector_search else "text (fallback)"
            llm_status = "active" if self.use_llm and self.lc_llm else "inactive"
            logger.info(f"Init finished in {elapsed:.2f}s. Search: {search_type}. LLM: {llm_status}. Recipes: {len(self.recipes_df)}.")
            return True
        else: # Handle overall init failure
            if not self.initialization_error: self.initialization_error = "Init failed (unknown reason)"
            logger.error(f"Initialization failed: {self.initialization_error}")
            self.is_initialized = False
            return False

    def _create_new_db(self):
        """ Creates vector DB and populates self.recipes_df. Includes enhanced logging."""
        try:
            # --- 1. Load Raw Data ---
            logger.info(f"Loading dataset '{DATASET_NAME}' from Hugging Face...")
            try:
                # Consider adding cache_dir argument if needed: cache_dir="./hf_cache"
                dataset = load_dataset(DATASET_NAME, split='train')
                recipes_raw_df = dataset.to_pandas()
                logger.info(f"Loaded and converted {len(recipes_raw_df)} recipes.")
                assert 'input' in recipes_raw_df.columns, "Missing 'input' column"
            except Exception as e:
                logger.exception(f"Dataset load failed: {e}")
                self.initialization_error = f"Dataset load failed: {e}"
                return False

            # --- 2. Sample Data ---
            logger.debug("Checking sample size...")
            if 0 < self.sample_size < len(recipes_raw_df): # Ensure sample_size is positive
                logger.info(f"Sampling {self.sample_size} recipes...")
                recipes_sampled_df = recipes_raw_df.sample(
                    self.sample_size, random_state=42
                ).reset_index(drop=True).copy()
            else:
                logger.info(f"Using all {len(recipes_raw_df)} loaded recipes (or invalid sample size).")
                recipes_sampled_df = recipes_raw_df.reset_index(drop=True).copy()
            logger.debug(f"DataFrame shape for processing: {recipes_sampled_df.shape}")

            # --- 3. Initialize Embeddings ---
            if not self.embeddings:
                logger.info("Initializing embeddings model (sentence-transformers/all-MiniLM-L6-v2)...")
                # Consider adding cache_folder argument if needed
                self.embeddings = HuggingFaceEmbeddings(
                    model_name="sentence-transformers/all-MiniLM-L6-v2"
                )
                logger.info("Embeddings model initialized.")
            else:
                logger.info("Embeddings model already initialized.")

            # --- 4. Parse 'input' Column & Create LangChain Documents ---
            logger.info(f"Starting parsing loop for {len(recipes_sampled_df)} recipes...")
            documents: List[Document] = []
            processed_data = []
            skipped = 0
            log_interval = max(1, len(recipes_sampled_df) // 10) # Log more frequently if needed

            for idx, row in recipes_sampled_df.iterrows():
                if (idx + 1) % log_interval == 0:
                    logger.debug(f"Parsing progress: {idx + 1}/{len(recipes_sampled_df)}")
                try:
                    inp = row.get('input','')
                    lines = [ln.strip() for ln in inp.splitlines()] if isinstance(inp, str) else []
                    if not lines: skipped += 1; continue
                    title = lines[0] if lines else f'Untitled Recipe {idx}'
                    ingreds = []; directs = []; in_i = False; in_d = False # Reset flags for each recipe

                    for line in lines[1:]:
                        line_strip = line.strip()
                        line_lower = line_strip.lower()
                        # State machine for parsing sections
                        if line_lower == 'ingredients:': in_i = True; in_d = False; continue
                        elif line_lower == 'directions:': in_d = True; in_i = False; continue
                        # If inside a section, append
                        if in_i: ingreds.append(line_strip.lstrip('- '))
                        elif in_d: directs.append(re.sub(r"^\s*[\d\W]+\.?\s*", "", line_strip)) # Clean step numbers/bullets
                        # Don't reset flags on empty lines within sections

                    i_str = "\n".join(ingreds).strip()
                    d_str = "\n".join(directs).strip()

                    if not title or not i_str or not d_str: skipped += 1; continue # Skip if essential parts missing

                    processed_data.append({
                        'title': title, 'ingredients': i_str, 'instructions': d_str,
                        'description': '', 'rating': None # Add placeholders
                    })
                    meta = { "doc_id": int(idx), "title": title, "ingredients": i_str, "instructions": d_str }
                    # Create document content combining key fields
                    doc_content = f"Title: {title}\n\nIngredients:\n{i_str}\n\nInstructions:\n{d_str}"
                    documents.append(Document(page_content=doc_content, metadata=meta))
                except Exception as e:
                    logger.warning(f"Error parsing row index {idx}: {e}. Title: '{title if 'title' in locals() else 'N/A'}'. Skipping.", exc_info=False)
                    skipped += 1

            logger.info(f"Parsing complete. Docs created: {len(documents)}, Data rows: {len(processed_data)}, Skipped: {skipped}")
            if not documents:
                self.initialization_error = "No valid documents were created after parsing."
                return False

            # --- 5. Store Parsed DataFrame & Save CSV ---
            self.recipes_df = pd.DataFrame(processed_data)
            if self.recipes_df.empty:
                self.initialization_error = "Parsed DataFrame is empty after processing."
                return False
            try:
                logger.info(f"Saving {len(self.recipes_df)} parsed recipes to CSV: {RECIPES_CSV_PATH}...")
                self.recipes_df.to_csv(RECIPES_CSV_PATH, index=False)
                logger.info("CSV saved.")
            except Exception as e:
                logger.warning(f"Could not save parsed recipes CSV: {e}")

            # --- 6. Create IN-MEMORY Chroma DB ---
            logger.info(f"Creating Chroma DB with {len(documents)} documents...")
            try:
                # Persistence logic would involve using persist_directory and Chroma(persist_directory=...) on reload
                self.vector_db = Chroma.from_documents(
                    documents=documents,
                    embedding=self.embeddings
                )
                logger.info("Chroma DB created successfully.")
                if self.recipes_df is None or self.recipes_df.empty: # Sanity check
                     raise RuntimeError("Critical Error: recipes_df lost after DB creation")
                return True
            except Exception as e:
                logger.exception(f"Chroma DB creation failed: {e}")
                self.initialization_error = f"Chroma DB creation failed: {e}"
                self.vector_db = None
                return False
        except Exception as e: # Catch any other unexpected error
            logger.exception(f"Outer error in _create_new_db: {e}")
            self.initialization_error = f"Outer DB creation error: {str(e)}"
            self.recipes_df = None; self.vector_db = None
            return False

    def _expand_query_with_llm(self, query: str) -> Optional[str]:
        """Uses LCEL chain with Gemini to expand search query."""
        if not self.use_llm or not self.lc_llm: return None
        start_time = time.time(); logger.info(f"LCEL Chain: Expanding query: '{query}'")
        try:
            template = "Expand this recipe search query with related terms: {query}"
            prompt = PromptTemplate.from_template(template)
            output_parser = StrOutputParser()
            expansion_chain = prompt | self.lc_llm | output_parser
            expanded_query = expansion_chain.invoke({"query": query})
            elapsed = time.time() - start_time
            logger.info(f"LCEL Chain: Original: '{query}' -> Expanded: '{expanded_query}' ({elapsed:.2f}s)")
            if not expanded_query or expanded_query.lower().strip() == query.lower().strip():
                logger.info("LCEL expansion resulted in empty or identical query."); return None
            return expanded_query.strip()
        except Exception as e: logger.exception(f"LCEL expansion error: {e}"); return None

    def _get_routing_decision(self, query: str) -> str:
        """Uses the LLM to decide whether a query is better for RAG or Text Search."""
        if not self.use_llm or not self.lc_llm:
            logger.warning("Router: LLM off. Defaulting to RAG.")
            return "RAG"
        logger.info(f"Router: Getting decision for query: '{query}'")
        start_time = time.time()
        routing_template = """You are a request router for a recipe system. Determine the best approach:
1. 'RAG': For specific questions about recipes (ingredients, instructions, properties like "is it vegetarian?").
2. 'TEXT_SEARCH': For general searches by name or keywords (e.g., "chocolate chip cookies", "tomato soup").
Respond ONLY 'RAG' or 'TEXT_SEARCH'. Query: {query} Approach:"""
        routing_prompt = PromptTemplate.from_template(routing_template)
        output_parser = StrOutputParser()
        try:
            routing_chain = routing_prompt | self.lc_llm | output_parser
            decision = routing_chain.invoke({"query": query}).strip().upper()
            elapsed = time.time() - start_time
            if decision in ["RAG", "TEXT_SEARCH"]: logger.info(f"Router: Decision '{decision}' ({elapsed:.2f}s)."); return decision
            else: logger.warning(f"Router: Bad response '{decision}'. Defaulting RAG."); return "RAG"
        except Exception as e: logger.exception(f"Router error: {e}. Defaulting RAG."); return "RAG"

    def search_recipes(self, query, num_results=3):
        """Searches recipes using LLM-routed approach."""
        log_prefix = f"Search(Q='{query}', N={num_results})"
        logger.info(f"{log_prefix}: Called. Init: {self.is_initialized}...")
        if not self.is_initialized: return "System not initialized."
        if self.recipes_df is None or self.recipes_df.empty: return "No recipe data."

        original_query = query; search_query = query
        expanded_query_used = False; llm_expansion_note = ""

        # Optional Expansion
        if self.use_llm:
            expanded_query = self._expand_query_with_llm(original_query)
            if expanded_query:
                search_query = expanded_query; expanded_query_used = True
                llm_expansion_note = f" (LLM expanded to: \"{search_query}\")"
                logger.info(f"{log_prefix}: Using expanded query '{search_query}'")
            else: logger.info(f"{log_prefix}: Using original query '{original_query}'")
        else: logger.info(f"{log_prefix}: LLM expansion off. Using original query.")

        search_start = time.time(); final_result = ""; search_method_used = "unknown"

        # Routing
        routing_decision = self._get_routing_decision(original_query)
        logger.info(f"{log_prefix}: Router path: {routing_decision}")

        try:
            # --- RAG Path ---
            if routing_decision == "RAG":
                search_method_used = "vector (RAG chosen)"
                if self.use_vector_search and self.vector_db is not None:
                    try: # Attempt RAG
                        logger.info(f"{log_prefix}: Retrieving docs (Q: '{search_query}')")
                        retriever = self.vector_db.as_retriever(search_kwargs={'k': num_results})
                        retrieved_docs: List[Document] = retriever.invoke(search_query)
                        logger.info(f"{log_prefix}: Found {len(retrieved_docs)} docs.")
                        if retrieved_docs and self.lc_llm:
                            logger.info(f"{log_prefix}: Running RAG chain.")
                            def format_docs(docs): return "\n\n---\n\n".join([f"Doc {i+1} (Title: {doc.metadata.get('title','N/A')}):\n{doc.page_content}" for i, doc in enumerate(docs)])
                            context_string = format_docs(retrieved_docs)
                            # Refined RAG prompt for better instructions
                            rag_template_qa = """You are a helpful Recipe Assistant. Your goal is to answer the user's query based *only* on the provided recipe Context. Be factual and concise. Follow these specific instructions:
1.  **Analyze the Query:** Is it a specific question about a recipe (e.g., "how long to bake", "ingredients for X", "is Y vegetarian?") or a general search term (e.g., "chicken soup", "easy dessert")?
2.  **Answer Based ONLY on Context:**
    * If the query is a specific question AND the Context contains a clear answer, provide that answer directly.
    * If the query is a specific question BUT the Context contains relevant recipes but NOT the specific answer, state what information IS available in the context related to the question (e.g., "The context includes a recipe for Chocolate Chip Cookies, but doesn't specify the exact baking temperature needed."). DO NOT GUESS or add external knowledge.
    * If the query is a specific question BUT the retrieved Context seems completely irrelevant, state that you couldn't find relevant information *in the provided documents* to answer the question.
    * If the query seems like a general search term AND the Context contains relevant recipes, present the recipes found clearly. For each recipe, include: Title, Ingredients, and Instructions. Format them nicely using Markdown.
    * If the query is a general search term BUT no relevant recipes are found in the Context, state that no matching recipes were found in the provided documents.
3.  **Formatting:** Use Markdown for readability (like bullet points for ingredients, numbered steps for instructions).

Context:
{context}

Query: {query}
Answer:"""
                            rag_prompt = PromptTemplate.from_template(rag_template_qa)
                            # Setup RAG chain
                            rag_chain = (
                                {"context": lambda x: context_string, "query": RunnablePassthrough()}
                                | rag_prompt
                                | self.lc_llm
                                | StrOutputParser()
                            )
                            logger.info(f"{log_prefix}: Invoking RAG chain with original query: '{original_query}'")
                            final_result = rag_chain.invoke(original_query) # Use original query as the question for the LLM
                            search_method_used = "vector (RAG executed)"
                        elif not retrieved_docs:
                            logger.info(f"{log_prefix}: 0 docs found for RAG. Falling back to text search.")
                            final_result = "" # Trigger fallback
                        else: # Docs found, but LLM is inactive
                            logger.warning(f"{log_prefix}: Docs found, but LLM inactive. Cannot RAG. Falling back to text search.")
                            final_result = "" # Trigger fallback
                    except Exception as rag_error:
                         logger.exception(f"{log_prefix}: Vector retrieval or RAG chain error: {rag_error}")
                         final_result = "" # Trigger fallback on error
                else: # RAG path chosen, but vector search is disabled or DB not available
                    logger.warning(f"{log_prefix}: RAG path chosen, but vector search is disabled or DB failed. Falling back to text search.")
                    final_result = "" # Trigger fallback

                # Fallback within RAG path if RAG failed or produced no result
                if not final_result:
                    logger.info(f"{log_prefix}: Falling back to text search (RAG path failed or yielded no result).")
                    search_method_used = "text (RAG fallback)"
                    final_result = self._execute_text_search_and_format(original_query, search_query, num_results, llm_expansion_note, is_fallback=True)

            # --- Text Search Path (Chosen by Router) ---
            elif routing_decision == "TEXT_SEARCH":
                search_method_used = "text (router chosen)"
                logger.info(f"{log_prefix}: Executing text search directly based on router decision.")
                final_result = self._execute_text_search_and_format(original_query, search_query, num_results, llm_expansion_note, is_fallback=False)

            # --- Handle unexpected router decision ---
            else:
                 logger.error(f"{log_prefix}: Invalid router decision '{routing_decision}'. Critical error.")
                 final_result = f"❌ Internal Error: Invalid routing decision '{routing_decision}'."

            # --- Final Logging and Return ---
            search_elapsed = time.time() - search_start
            logger.info(f"{log_prefix}: Completed via '{search_method_used}' path in {search_elapsed:.2f}s.")

            # --- MODIFICATION START ---
            # Prepare the main response string
            final_output_string = final_result if final_result else f"😕 No results found for \"{original_query}\"."

            # Create the debug string (add extra newlines for separation)
            # Use markdown code block for clarity
            debug_info = f"\n\n---\n`DEBUG: Router={routing_decision}, Method={search_method_used}`"

            # Append debug info to the main response
            return final_output_string + debug_info
            # --- MODIFICATION END ---

        except Exception as e: # Catch unexpected outer errors
            logger.exception(f"{log_prefix}: Unexpected outer error: {e}")
            # Also add debug info to error messages if possible (or default)
            error_debug_info = f"\n\n---\n`DEBUG: Router={routing_decision}, Method=ErrorBeforeCompletion`"
            return f"❌ An unexpected critical error occurred: {str(e)}" + error_debug_info

    # --- Helper for Text Search Execution and Formatting ---
    def _execute_text_search_and_format(self, original_query, search_query, num_results, llm_expansion_note, is_fallback=False):
        """
        Helper to run text search and format results for display.
        Includes debug info about the execution method in the returned string.
        """
        log_prefix = f"Search(Q='{original_query}', N={num_results})" # Re-establish prefix for logging clarity
        logger.info(f"{log_prefix}: Executing text search logic (Fallback={is_fallback}). Query='{search_query}'")
        if self.recipes_df is None or self.recipes_df.empty:
            logger.error(f"{log_prefix}: Text search error: recipes_df missing.")
            # Add debug info even to error messages if possible
            method = "text (RAG fallback)" if is_fallback else "text (router chosen)"
            debug_info = f"\n\n---\n`DEBUG: Method={method}`"
            return f"❌ Error: Recipe data frame is missing." + debug_info

        text_indices = self._text_search(search_query, num_results) # Use potentially expanded query
        logger.info(f"{log_prefix}: Text search found indices: {text_indices}")
        text_results_data = []
        processed_indices = set()
        for recipe_id in text_indices:
            # Validate index before attempting iloc
            if isinstance(recipe_id, int) and 0 <= recipe_id < len(self.recipes_df) and recipe_id not in processed_indices:
                try:
                    recipe_data = self.recipes_df.iloc[recipe_id]
                    # Ensure necessary keys exist, provide defaults if not
                    title = recipe_data.get('title', f'Recipe {recipe_id}')
                    ingredients = str(recipe_data.get('ingredients', 'N/A'))
                    instructions = str(recipe_data.get('instructions', 'N/A'))
                    text_results_data.append({'title': title, 'ingredients': ingredients, 'instructions': instructions})
                    processed_indices.add(recipe_id)
                except Exception as df_error:
                    logger.warning(f"Text search DF access error for index {recipe_id}: {df_error}")
            else:
                logger.warning(f"Invalid or already processed text index skipped: {recipe_id}")

        # Determine the method string for notes and debug info
        method = "text (RAG fallback)" if is_fallback else "text (router chosen)"
        search_note = "(using _text search fallback_)" if is_fallback else "(using _text search_)"
        debug_info = f"\n\n---\n`DEBUG: Method={method}`" # Debug info based on how this function was called

        if text_results_data:
            logger.info(f"{log_prefix}: Formatting {len(text_results_data)} text results.")
            # Start formatted output
            formatted_output = f"Found {len(text_results_data)} recipe(s) for \"**{original_query}**\"{llm_expansion_note} {search_note}:\n\n---\n\n"
            # Loop through collected data
            for i, recipe in enumerate(text_results_data):
                try:
                    title = recipe.get('title', 'Untitled Recipe') # Use data from list
                    formatted_output += f"### {i+1}. {title}\n\n"
                    ing = recipe.get('ingredients')
                    inst = recipe.get('instructions')
                    # Format ingredients if present
                    if ing and ing != 'N/A':
                        ing_list = [f"- {line.strip()}" for line in ing.strip().split('\n') if line.strip()]
                        if ing_list: formatted_output += "**Ingredients:**\n" + "\n".join(ing_list) + "\n\n"
                    # Format instructions if present
                    if inst and inst != 'N/A':
                        inst_list = [f"{num}. {line.strip()}" for num, line in enumerate(inst.strip().split('\n'), 1) if line.strip()]
                        if inst_list: formatted_output += "**Instructions:**\n" + "\n".join(inst_list) + "\n\n"
                except Exception as fmt_e:
                    logger.warning(f"Error formatting text result #{i+1} (Title: '{recipe.get('title', 'N/A')}'): {fmt_e}")
                    formatted_output += f"*Error formatting recipe {i+1}*\n\n" # Add error note in output
                # Add separator between recipes
                if i < len(text_results_data) - 1:
                    formatted_output += "---\n\n"
            # Append debug info before returning
            return formatted_output.strip() + debug_info
        else:
            # Handle case where text search yields no results
            logger.info(f"{log_prefix}: Text search (Fallback={is_fallback}) found 0 results after index processing.")
            # Append debug info before returning
            return f"😕 No recipes found matching: \"{original_query}\"." + debug_info

    def _text_search(self, query, num_results=3):
        """Performs keyword search on self.recipes_df."""
        if self.recipes_df is None or self.recipes_df.empty: return []
        try:
            query_lower = query.lower()
            # Improved keyword extraction (handles more cases)
            query_words = set(re.findall(r'\b\w{3,}\b', query_lower))
            if not query_words: logger.warning(f"Text Search: No valid keywords found in '{query}'."); return []

            scored_recipes = []
            # Ensure columns exist and handle potential NaN before string operations
            titles = self.recipes_df.get('title', pd.Series(dtype=str)).fillna('').str.lower()
            ingredients_col = self.recipes_df.get('ingredients', pd.Series(dtype=str)).fillna('').astype(str).str.lower()
            # Consider adding instructions to search space? instructions_col = self.recipes_df.get('instructions', pd.Series(dtype=str)).fillna('').astype(str).str.lower()
            search_texts = titles + " " + ingredients_col # Combine relevant text fields

            for idx, text_content in search_texts.items():
                score = 0
                try:
                    # Basic scoring logic
                    if query_lower in text_content: score += 20 # Boost exact phrase match
                    # Word overlap scoring
                    text_words = set(word for word in re.findall(r'\b\w{3,}\b', text_content))
                    score += len(query_words.intersection(text_words)) * 5 # Keyword overlap
                    # Title overlap boost
                    title_words = set(word for word in re.findall(r'\b\w{3,}\b', titles.get(idx, '')))
                    score += len(query_words.intersection(title_words)) * 10 # Title keyword overlap boost
                except Exception as score_err:
                    # Log scoring errors but continue
                    logger.warning(f"Scoring error for index {idx}: {score_err}", exc_info=False)
                if score > 0: scored_recipes.append((idx, score))

            # Sort by score descending
            scored_recipes.sort(key=lambda x: x[1], reverse=True)
            # Return top N indices
            return [idx for idx, score in scored_recipes[:num_results]]
        except Exception as e:
            # Log unexpected errors during the search process
            logger.exception(f"Unexpected error during text search for '{query}': {e}")
            return []

    @staticmethod
    def _get_backup_recipes():
        """ Provides a small, hardcoded list of recipes as a fallback. """
        return [
            {"title": "Spaghetti Carbonara", "description": "", "ingredients": "Spaghetti\nEggs\nPancetta or Guanciale\nPecorino Romano cheese\nBlack pepper", "instructions": "Cook spaghetti.\nFry pancetta.\nWhisk eggs and cheese.\nCombine pasta, pancetta fat, egg mixture off heat.\nAdd pasta water if needed.\nServe with pepper.", "rating": None},
            {"title": "Chocolate Chip Cookies", "description": "", "ingredients": "Butter\nSugar\nBrown Sugar\nEggs\nVanilla Extract\nFlour\nBaking Soda\nSalt\nChocolate Chips", "instructions": "Cream butter and sugars.\nBeat in eggs and vanilla.\nCombine dry ingredients.\nMix wet and dry.\nStir in chocolate chips.\nDrop onto baking sheets.\nBake until golden brown.", "rating": None},
            {"title": "Chicken Stir Fry", "description": "", "ingredients": "Chicken breast\nBroccoli\nBell peppers\nCarrots\nSoy sauce\nGinger\nGarlic\nSesame oil\nRice", "instructions": "Cut chicken and vegetables.\nStir-fry chicken until cooked.\nAdd vegetables and stir-fry until tender-crisp.\nMix sauce ingredients.\nPour sauce over stir-fry.\nServe with rice.", "rating": None},
            {"title": "Greek Salad", "description": "", "ingredients": "Cucumber\nTomatoes\nRed onion\nKalamata olives\nFeta cheese\nOlive oil\nRed wine vinegar\nOregano", "instructions": "Chop vegetables.\nCombine vegetables and olives in a bowl.\nCrumble feta cheese over salad.\nWhisk olive oil, vinegar, and oregano for dressing.\nDrizzle dressing over salad.", "rating": None},
            {"title": "Easy Banana Bread", "description": "", "ingredients": "Ripe bananas\nButter\nSugar\nEgg\nVanilla extract\nFlour\nBaking soda\nSalt", "instructions": "Mash bananas.\nMelt butter.\nMix melted butter, sugar, egg, and vanilla.\nCombine dry ingredients.\nMix wet and dry ingredients until just combined.\nPour into loaf pan.\nBake until a toothpick comes out clean.", "rating": None}
        ]

# ==============================================================================
# Gradio Interface Creation (Stateful Chatbot UI - Corrected Outputs/Yields)
# ==============================================================================
def create_interface():
    """Sets up and defines the Gradio web interface using a stateful gr.Chatbot."""
    recipe_system = RecipeRecommendationSystem()
    logger.info("Creating Gradio interface with Stateful Chatbot...")

    # --- UI Helper Functions (Corrected outputs for ALL buttons/inputs) ---
    def ui_init_system(sample_size_value, progress=gr.Progress(track_tqdm=True)):
        logger.info(f"UI: Init clicked. Sample size: {sample_size_value}")
        status_msg = "Initializing..."
        # Outputs: Status, Init Btn, Reload Btn, Send Btn, Msg Input
        # Yield status + 4 updates (for the 4 components in outputs list below)
        yield status_msg, gr.update(interactive=False), gr.update(interactive=False), gr.update(interactive=False), gr.update(interactive=False)
        try:
            success = recipe_system.initialize(force_reload=False, sample_size=int(sample_size_value))
            if success and recipe_system.is_initialized:
                num = len(recipe_system.recipes_df) if recipe_system.recipes_df is not None else 0; db = "vector" if recipe_system.use_vector_search else "text"; llm = "active" if recipe_system.use_llm and recipe_system.lc_llm else "inactive"; status_msg = f"✅ Initialized ({num} recipes, {db} search, LLM {llm}). Ready."
                # Enable all relevant controls -> Yield Status + 4 True updates
                yield status_msg, gr.update(interactive=True), gr.update(interactive=True), gr.update(interactive=True), gr.update(interactive=True)
            else:
                status_msg = f"❌ Init failed: {recipe_system.initialization_error}. May use backups."
                ok = recipe_system.recipes_df is not None and not recipe_system.recipes_df.empty
                # Enable Init Btn, enable others based on fallback 'ok' -> Yield Status + 1 True + 3 'ok' updates
                yield status_msg, gr.update(interactive=True), gr.update(interactive=ok), gr.update(interactive=ok), gr.update(interactive=ok)
        except Exception as e:
            logger.exception(f"UI initialization error: {e}")
             # Enable all controls on error to allow retry -> Yield Status + 4 True updates
            yield f"❌ UI Error: {e}", gr.update(interactive=True), gr.update(interactive=True), gr.update(interactive=True), gr.update(interactive=True)

    def ui_reload_system(sample_size_value, progress=gr.Progress(track_tqdm=True)):
        logger.info(f"UI: Reload clicked. Sample size: {sample_size_value}")
        status_msg = "Reloading..."
         # Outputs: Status, Init Btn, Reload Btn, Send Btn, Msg Input
         # Yield status + 4 updates
        yield status_msg, gr.update(interactive=False), gr.update(interactive=False), gr.update(interactive=False), gr.update(interactive=False)
        try:
            success = recipe_system.initialize(force_reload=True, sample_size=int(sample_size_value))
            if success and recipe_system.is_initialized:
                num = len(recipe_system.recipes_df) if recipe_system.recipes_df is not None else 0; db = "vector" if recipe_system.use_vector_search else "text"; llm = "active" if recipe_system.use_llm and recipe_system.lc_llm else "inactive"; status_msg = f"✅ Reloaded ({num} recipes, {db} search, LLM {llm}). Ready."
                 # Enable all -> Yield Status + 4 True updates
                yield status_msg, gr.update(interactive=True), gr.update(interactive=True), gr.update(interactive=True), gr.update(interactive=True)
            else:
                status_msg = f"❌ Reload failed: {recipe_system.initialization_error}. May use backups."
                ok = recipe_system.recipes_df is not None and not recipe_system.recipes_df.empty
                 # Enable Init Btn, enable others based on fallback 'ok' -> Yield Status + 1 True + 3 'ok' updates
                yield status_msg, gr.update(interactive=True), gr.update(interactive=ok), gr.update(interactive=ok), gr.update(interactive=ok)
        except Exception as e:
            logger.exception(f"UI reload error: {e}")
              # Enable all controls on error -> Yield Status + 4 True updates
            yield f"❌ UI Error: {e}", gr.update(interactive=True), gr.update(interactive=True), gr.update(interactive=True), gr.update(interactive=True)

    # --- Stateful Chat Interaction Function (Includes fix for ValidationError) ---
    def respond(message, chat_history_list, num_results_value):
        """
        Handles user message, appends to history, calls backend, updates history.
        Uses 'messages' format (list of dicts with 'role' and 'content').
        Uses '...' as placeholder instead of None for content.
        """
        logger.info(f"UI Chat: Msg='{message}', History Len={len(chat_history_list)}, N={num_results_value}")

        # Input Validation & Initialization Check
        if not message or not message.strip():
            logger.warning("Respond function called with empty message.")
            chat_history_list.append({"role": "user", "content": message})
            chat_history_list.append({"role": "assistant", "content": "⚠️ Please enter a message."})
            return chat_history_list, gr.update(value="") # Return history and clear update

        if not recipe_system.is_initialized and (recipe_system.recipes_df is None or recipe_system.recipes_df.empty):
            logger.warning("Respond function called but system not initialized.")
            chat_history_list.append({"role": "user", "content": message})
            chat_history_list.append({"role": "assistant", "content": "⚠️ System not initialized or no data loaded. Please Initialize/Reload."})
            return chat_history_list, gr.update(value="") # Return history and clear update

        # Append user message and placeholder for bot - yield for immediate display
        chat_history_list.append({"role": "user", "content": message})
        chat_history_list.append({"role": "assistant", "content": "..."}) # Placeholder
        # Yield history to display user message & placeholder, yield empty string "" to clear input
        yield chat_history_list, ""

        # Call Backend
        bot_response_content = "Error generating response." # Default
        try:
            logger.info("Calling recipe_system.search_recipes...")
            bot_response_content = recipe_system.search_recipes(message, int(num_results_value))
            if not bot_response_content: # Handle empty returns
                bot_response_content = "😕 No specific information found."
            logger.info("Backend search successful.")
        except Exception as e:
            logger.exception(f"Error during backend search call from chat: {e}")
            bot_response_content = f"❌ Error calling backend: {e}"

        # Update the placeholder in history with the actual response
        chat_history_list[-1]["content"] = bot_response_content

        # Yield final history state (input box already cleared)
        yield chat_history_list, ""

    # --- UI Layout ---
    with gr.Blocks(
        title="Recipe Chat Agent",
        theme=gr.themes.Soft(primary_hue=gr.themes.colors.amber, secondary_hue=gr.themes.colors.lime),
        css=".gradio-container {max-width: 800px !important}"
    ) as demo:
        gr.Markdown("# 🍲 Recipe Chat Agent 🎉")
        gr.Markdown("### Ask questions or search for recipes conversationally!")

        # Define ALL UI Components FIRST
        with gr.Row():
            with gr.Column(scale=1):
                status_display = gr.Textbox("Status: Not initialized.", label="System Status", interactive=False, lines=2)
            with gr.Column(scale=2):
                with gr.Accordion("⚙️ Settings & Initialization", open=False):
                    sample_slider = gr.Slider(minimum=100, maximum=5000, value=1000, step=100, label="Recipes to Load/Sample", info="Affects init time/memory.")
                    results_slider = gr.Slider(minimum=1, maximum=5, value=3, step=1, label="# Results/Context Docs", info="For RAG context or # Text Results")
                    with gr.Row():
                        init_button = gr.Button("🚀 Initialize System", variant="secondary", size="sm") # Interactive state set by load
                        reload_button = gr.Button("🔄 Reload Data", variant="stop", size="sm") # Interactive state set by load

        with gr.Group(visible=True) as chat_interface_group: # Keep visible
            chatbot = gr.Chatbot(label="Conversation", bubble_full_width=False, height=500, type='messages') # Use 'messages' type
            chat_history = gr.State([]) # Initialize state for history list
            with gr.Row():
                msg_input = gr.Textbox(label="Your Message:", placeholder="Type your message here...", lines=1, scale=4, container=False) # Interactive state set by load
                send_button = gr.Button("✉️ Send", variant="primary", scale=1, min_width=100) # Interactive state set by load
            gr.Examples(
                examples=[
                    ["easy weeknight dinner"], ["healthy vegetarian soup"],
                    ["how long does the banana bread take to bake?"],
                    ["does the carbonara recipe use cream?"], ["супа со печурки"],
                    ["find recipes with feta and olives"]
                ],
                inputs=msg_input, label="Example Messages"
            )

        # --- Define ALL Event Listeners AFTER components ---
        init_button.click(
            fn=ui_init_system,
            inputs=[sample_slider],
            # Outputs: Status, Init Btn, Reload Btn, Send Btn, Msg Input (5 total)
            outputs=[status_display, init_button, reload_button, send_button, msg_input] # CORRECTED
        )
        reload_button.click(
            fn=ui_reload_system,
            inputs=[sample_slider],
            # Outputs: Status, Init Btn, Reload Btn, Send Btn, Msg Input (5 total)
            outputs=[status_display, init_button, reload_button, send_button, msg_input] # CORRECTED
        )

        # Connect chat interactions
        send_button.click(
            fn=respond,
            inputs=[msg_input, chat_history, results_slider],
            outputs=[chatbot, msg_input] # Respond updates chatbot and clears input
        )
        msg_input.submit(
            fn=respond,
            inputs=[msg_input, chat_history, results_slider],
            outputs=[chatbot, msg_input] # Respond updates chatbot and clears input
        )

        # Initial setup on load: Enable ONLY init_button
        def setup_load_state():
           # Return updates for: Init, Reload, Send, MsgInput (4 total)
           # Enable Init, disable others
           return gr.update(interactive=True), gr.update(interactive=False), gr.update(interactive=False), gr.update(interactive=False) # CORRECTED
        demo.load(
            fn=setup_load_state, inputs=None,
            # Components to update: Init, Reload, Send, MsgInput (4 total)
            outputs=[init_button, reload_button, send_button, msg_input] # CORRECTED
        )

    logger.info("Gradio Interface definition complete.")
    return demo

# ==============================================================================
# Main Execution Block (Keep as before)
# ==============================================================================
# ... (rest of the script including if __name__ == "__main__":) ...

    # --- UI Layout ---
    with gr.Blocks(
        title="Recipe Chat Agent",
        theme=gr.themes.Soft(primary_hue=gr.themes.colors.amber, secondary_hue=gr.themes.colors.lime),
        css=".gradio-container {max-width: 800px !important}"
    ) as demo:
        gr.Markdown("# 🍲 Recipe Chat Agent 🎉")
        gr.Markdown("### Ask questions or search for recipes conversationally!")

        # Define ALL UI Components FIRST
        with gr.Row():
            with gr.Column(scale=1):
                status_display = gr.Textbox("Status: Not initialized.", label="System Status", interactive=False, lines=2)
            with gr.Column(scale=2):
                with gr.Accordion("⚙️ Settings & Initialization", open=False):
                    sample_slider = gr.Slider(minimum=100, maximum=5000, value=1000, step=100, label="Recipes to Load/Sample", info="Affects init time/memory.")
                    results_slider = gr.Slider(minimum=1, maximum=5, value=3, step=1, label="# Results/Context Docs", info="For RAG context or # Text Results")
                    with gr.Row():
                        init_button = gr.Button("🚀 Initialize System", variant="secondary", size="sm") # Interactive state set by load
                        reload_button = gr.Button("🔄 Reload Data", variant="stop", size="sm") # Interactive state set by load

        with gr.Group(visible=True) as chat_interface_group: # Keep visible
            chatbot = gr.Chatbot(label="Conversation", height=500, type='messages') # Use 'messages' type
            chat_history = gr.State([]) # Initialize state for history list
            with gr.Row():
                msg_input = gr.Textbox(label="Your Message:", placeholder="Type your message here...", lines=1, scale=4, container=False) # Interactive state set by load
                send_button = gr.Button("✉️ Send", variant="primary", scale=1, min_width=100) # Interactive state set by load
            gr.Examples(
                examples=[
                    ["easy weeknight dinner"], ["healthy vegetarian soup"],
                    ["how long does the banana bread take to bake?"],
                    ["does the carbonara recipe use cream?"], ["супа со печурки"],
                    ["find recipes with feta and olives"]
                ],
                inputs=msg_input, label="Example Messages"
            )

        # --- Define ALL Event Listeners AFTER components ---
        init_button.click(
            fn=ui_init_system,
            inputs=[sample_slider],
            outputs=[status_display, init_button, reload_button, send_button, msg_input]
        )
        reload_button.click(
            fn=ui_reload_system,
            inputs=[sample_slider],
            outputs=[status_display, init_button, reload_button, send_button, msg_input]
        )

        # Connect chat interactions
        # Use .then() to clear input AFTER respond finishes and updates chatbot
        # Clears input textbox
        clear_input = msg_input.submit(
            fn=respond,
            inputs=[msg_input, chat_history, results_slider],
            outputs=[chatbot, msg_input] # Respond updates chatbot and clears input
        )
         # Send button also uses respond and clears input
        send_button.click(
            fn=respond,
            inputs=[msg_input, chat_history, results_slider],
            outputs=[chatbot, msg_input] # Respond updates chatbot and clears input
        )


        # Initial setup on load: Enable ONLY init_button
        def setup_load_state():
           # Return updates for: Init, Reload, Send, MsgInput
           return gr.update(interactive=True), gr.update(interactive=False), gr.update(interactive=False), gr.update(interactive=False)
        demo.load(
            fn=setup_load_state, inputs=None,
            outputs=[init_button, reload_button, send_button, msg_input]
        )

    logger.info("Gradio Interface definition complete.")
    return demo

# ==============================================================================
# Main Execution Block
# ==============================================================================
if __name__ == "__main__":
    logger.info("Application starting...")
    if not LANGCHAIN_LLM_AVAILABLE: logger.warning("!"*20 + "\nLangChain LLM (Gemini) setup INCOMPLETE...\n" + "!"*20)
    else: logger.info("LangChain LLM dependencies and API key found.")
    if not VECTOR_IMPORTS_AVAILABLE: logger.warning("!"*20 + "\nVector search dependencies NOT FOUND...\n" + "!"*20)
    else: logger.info("Vector search dependencies found.")

    logger.info("Creating Gradio interface...")
    interface = create_interface()

    logger.info("Launching Gradio interface...")
    interface.launch(share=False) # Share=False for local testing

    logger.info("Gradio interface closed.")