# agent.py — AGENTE SEMÁNTICO CON SÍNTESIS INTELIGENTE v1.0
import os
import time
import logging
import numpy as np
from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError
from typing import Optional, Tuple, List, Dict, Any
from sentence_transformers import CrossEncoder, SentenceTransformer
import faiss
import spacy
from spacy.lang.en import English

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

try:
    NLP = spacy.load("en_core_web_sm")
    logger.info("✅ spaCy 'en_core_web_sm' cargado.")
except OSError:
    logger.info("📥 Descargando 'en_core_web_sm'...")
    from spacy.cli import download
    download("en_core_web_sm")
    NLP = spacy.load("en_core_web_sm")
    logger.info("✅ spaCy 'en_core_web_sm' descargado y cargado.")
except Exception as e:
    logger.warning(f"⚠️ Error con spaCy: {e}. Usando tokenizer básico.")
    NLP = English()
    NLP.add_pipe("sentencizer")

class ImprovedSemanticAgent:
    def __init__(self):
        logger.info("🚀 Cargando modelo de embeddings (bge-small-en-v1.5)...")
        self.embedding_model = SentenceTransformer('BAAI/bge-small-en-v1.5')
        self.reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L6-v2', max_length=512)
        self.index = None
        self.indexed_examples = []
        self.is_ready = False
        self.total_indexed = 0
        self.searches_performed = 0
        logger.info("✅ Agente semántico inicializado")

    def _lazy_init(self) -> str:
        if self.is_ready:
            return "✅ Agente ya inicializado"
        try:
            with ThreadPoolExecutor() as executor:
                future = executor.submit(self._load_precomputed_index)
                try:
                    return future.result(timeout=60)
                except FutureTimeoutError:
                    return "❌ Timeout inicializando agente"
        except Exception as e:
            return f"❌ Error: {str(e)}"

    def _load_precomputed_index(self) -> str:
        if not os.path.exists("faiss_index.bin") or not os.path.exists("metadata.json"):
            return "❌ Archivos de índice no encontrados"
        self.index = faiss.read_index("faiss_index.bin")
        with open("metadata.json", 'r', encoding='utf-8') as f:
            import json
            self.indexed_examples = json.load(f)
        self.total_indexed = len(self.indexed_examples)
        self.is_ready = True
        return f"✅ ¡Listo! {self.total_indexed:,} ejemplos cargados"

    def _extract_core_entities(self, text: str) -> set:
        if not text.strip():
            return set()
        doc = NLP(text.lower())
        entities = set()
        for token in doc:
            if token.pos_ in ("NOUN", "PROPN", "ADJ") and len(token.text) >= 3 and not token.is_stop:
                entities.add(token.lemma_)
        for chunk in doc.noun_chunks:
            if len(chunk.text) > 2 and not all(t.is_stop for t in chunk):
                entities.add(chunk.lemma_.replace(" ", "_"))
        text_lower = text.lower()
        if "fire" in text_lower or "flame" in text_lower:
            entities.add("on_fire")
        if "ice" in text_lower or "frozen" in text_lower:
            entities.add("frozen")
        if "gold" in text_lower or "golden" in text_lower:
            entities.add("golden")
        return entities

    def enhance_prompt(self, user_prompt: str, category: str = "auto") -> Tuple[str, str]:
        if not self.is_ready:
            init_status = self._lazy_init()
            if not self.is_ready:
                return user_prompt, f"⚠️ {init_status}"
        start_time = time.time()
        self.searches_performed += 1
        enhanced, search_info = self._do_enhancement(user_prompt, category)
        elapsed = time.time() - start_time
        return enhanced, f"{search_info} (Tiempo: {elapsed:.2f}s)"

    def _do_enhancement(self, user_prompt: str, category: str) -> Tuple[str, str]:
        try:
            logger.info(f"🔍 Analizando: '{user_prompt}'")
            query_embedding = self.embedding_model.encode([user_prompt], convert_to_numpy=True, normalize_embeddings=True)[0]
            query_embedding = query_embedding.astype('float32').reshape(1, -1)
            distances, indices = self.index.search(query_embedding, 5)
            
            candidates = []
            for idx in indices[0]:
                if idx < len(self.indexed_examples):
                    candidates.append(self.indexed_examples[idx]['caption'])
            
            if not candidates:
                return self._structural_fallback(user_prompt, category), "🔧 Fallback estructural"
            
            user_words = set(user_prompt.lower().split())
            all_parts = []
            
            for caption in candidates:
                parts = [p.strip() for p in caption.split(',') if 8 <= len(p) <= 120]
                for part in parts:
                    part_lower = part.lower()
                    if len(set(part_lower.split()) - user_words) >= 2:
                        all_parts.append(part)
            
            seen = set()
            unique_parts = []
            for p in all_parts:
                if p not in seen:
                    unique_parts.append(p)
                    seen.add(p)
            
            selected = unique_parts[:6]
            if selected:
                additions = ", ".join(selected)
                enhanced = f"{user_prompt}, {additions}"
                return enhanced, f"✨ Prompt sintetizado con {len(candidates)} ejemplos"
            else:
                return self._structural_fallback(user_prompt, category), "🔧 Fallback estructural (sin frases útiles)"
                
        except Exception as e:
            logger.error(f"❌ Error en _do_enhancement: {e}")
            return user_prompt, f"❌ Error: {str(e)}"

    def _structural_fallback(self, prompt: str, category: str) -> str:
        enhancements = {
            "entity": ", highly detailed, sharp focus, professional photography, 8k resolution",
            "composition": ", cinematic composition, atmospheric perspective, golden hour, ultra-detailed",
            "style": ", artistic rendering, masterpiece, vibrant colors, museum quality",
            "imaginative": ", fantasy art, dreamlike atmosphere, magical lighting, intricate details",
            "text": ", typography design, clear lettering, high contrast, professional layout"
        }
        return prompt + enhancements.get(category, ", high quality, detailed, professional, 8k resolution")

    def get_semantic_example(self, category: str, user_prompt: str = "") -> Optional[str]:
        try:
            if not self.is_ready:
                return "⚠️ Agente no inicializado"
            search_text = user_prompt if user_prompt.strip() else "detailed professional artwork"
            search_embedding = self.embedding_model.encode([search_text], convert_to_numpy=True, normalize_embeddings=True)[0]
            search_embedding = search_embedding.astype('float32').reshape(1, -1)
            k = min(20, len(self.indexed_examples))
            distances, indices = self.index.search(search_embedding, k)
            used_indices = getattr(self, '_used_indices', set())
            for idx in indices[0]:
                if idx < len(self.indexed_examples) and idx not in used_indices:
                    used_indices.add(idx)
                    self._used_indices = used_indices
                    return self.indexed_examples[idx]['caption']
            self._used_indices = set()
            if indices[0].size > 0 and indices[0][0] < len(self.indexed_examples):
                idx = indices[0][0]
                self._used_indices.add(idx)
                return self.indexed_examples[idx]['caption']
            return "🔍 No encontrado"
            
        except Exception as e:
            return f"❌ Error: {str(e)}"

    def get_stats(self) -> Dict:
        return {
            "agente": {
                "total_indexado": self.total_indexed,
                "búsquedas_realizadas": self.searches_performed,
                "listo": self.is_ready
            }
        }