File size: 8,219 Bytes
ebe3b8c
67930d0
 
 
 
 
 
 
 
4c3d4d3
 
67930d0
 
 
 
4c3d4d3
 
 
41afdf3
e1fc349
41afdf3
 
 
 
4c3d4d3
e1fc349
4c3d4d3
 
 
67930d0
 
4c3d4d3
67930d0
 
 
 
 
 
 
4c3d4d3
67930d0
 
 
 
 
 
 
 
4c3d4d3
67930d0
e1fc349
67930d0
 
 
 
953684a
 
 
 
 
 
 
 
 
67930d0
4c3d4d3
 
 
 
 
 
 
 
 
 
 
 
e1fc349
4c3d4d3
 
 
 
 
 
 
67930d0
 
 
 
 
 
 
 
 
 
 
 
 
953684a
67930d0
 
e24ddbd
ebe3b8c
e24ddbd
2333643
e24ddbd
 
ebe3b8c
e24ddbd
 
ebe3b8c
e24ddbd
 
ebe3b8c
e24ddbd
 
531b6d6
 
e24ddbd
 
ebe3b8c
953684a
e24ddbd
 
953684a
e24ddbd
953684a
ebe3b8c
e24ddbd
953684a
 
 
e24ddbd
953684a
e24ddbd
ebe3b8c
67930d0
953684a
e24ddbd
67930d0
953684a
67930d0
953684a
 
 
 
 
67930d0
953684a
67930d0
 
 
 
953684a
 
67930d0
 
953684a
67930d0
4c3d4d3
67930d0
4c3d4d3
 
 
953684a
 
 
 
 
 
 
ebe3b8c
67930d0
 
 
 
 
953684a
 
 
 
4c3d4d3
953684a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
# agent.py — AGENTE SEMÁNTICO CON SÍNTESIS INTELIGENTE v1.0
import os
import time
import logging
import numpy as np
from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError
from typing import Optional, Tuple, List, Dict, Any
from sentence_transformers import CrossEncoder, SentenceTransformer
import faiss
import spacy
from spacy.lang.en import English

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

try:
    NLP = spacy.load("en_core_web_sm")
    logger.info("✅ spaCy 'en_core_web_sm' cargado.")
except OSError:
    logger.info("📥 Descargando 'en_core_web_sm'...")
    from spacy.cli import download
    download("en_core_web_sm")
    NLP = spacy.load("en_core_web_sm")
    logger.info("✅ spaCy 'en_core_web_sm' descargado y cargado.")
except Exception as e:
    logger.warning(f"⚠️ Error con spaCy: {e}. Usando tokenizer básico.")
    NLP = English()
    NLP.add_pipe("sentencizer")

class ImprovedSemanticAgent:
    def __init__(self):
        logger.info("🚀 Cargando modelo de embeddings (bge-small-en-v1.5)...")
        self.embedding_model = SentenceTransformer('BAAI/bge-small-en-v1.5')
        self.reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L6-v2', max_length=512)
        self.index = None
        self.indexed_examples = []
        self.is_ready = False
        self.total_indexed = 0
        self.searches_performed = 0
        logger.info("✅ Agente semántico inicializado")

    def _lazy_init(self) -> str:
        if self.is_ready:
            return "✅ Agente ya inicializado"
        try:
            with ThreadPoolExecutor() as executor:
                future = executor.submit(self._load_precomputed_index)
                try:
                    return future.result(timeout=60)
                except FutureTimeoutError:
                    return "❌ Timeout inicializando agente"
        except Exception as e:
            return f"❌ Error: {str(e)}"

    def _load_precomputed_index(self) -> str:
        if not os.path.exists("faiss_index.bin") or not os.path.exists("metadata.json"):
            return "❌ Archivos de índice no encontrados"
        self.index = faiss.read_index("faiss_index.bin")
        with open("metadata.json", 'r', encoding='utf-8') as f:
            import json
            self.indexed_examples = json.load(f)
        self.total_indexed = len(self.indexed_examples)
        self.is_ready = True
        return f"✅ ¡Listo! {self.total_indexed:,} ejemplos cargados"

    def _extract_core_entities(self, text: str) -> set:
        if not text.strip():
            return set()
        doc = NLP(text.lower())
        entities = set()
        for token in doc:
            if token.pos_ in ("NOUN", "PROPN", "ADJ") and len(token.text) >= 3 and not token.is_stop:
                entities.add(token.lemma_)
        for chunk in doc.noun_chunks:
            if len(chunk.text) > 2 and not all(t.is_stop for t in chunk):
                entities.add(chunk.lemma_.replace(" ", "_"))
        text_lower = text.lower()
        if "fire" in text_lower or "flame" in text_lower:
            entities.add("on_fire")
        if "ice" in text_lower or "frozen" in text_lower:
            entities.add("frozen")
        if "gold" in text_lower or "golden" in text_lower:
            entities.add("golden")
        return entities

    def enhance_prompt(self, user_prompt: str, category: str = "auto") -> Tuple[str, str]:
        if not self.is_ready:
            init_status = self._lazy_init()
            if not self.is_ready:
                return user_prompt, f"⚠️ {init_status}"
        start_time = time.time()
        self.searches_performed += 1
        enhanced, search_info = self._do_enhancement(user_prompt, category)
        elapsed = time.time() - start_time
        return enhanced, f"{search_info} (Tiempo: {elapsed:.2f}s)"

    def _do_enhancement(self, user_prompt: str, category: str) -> Tuple[str, str]:
        try:
            logger.info(f"🔍 Analizando: '{user_prompt}'")
            query_embedding = self.embedding_model.encode([user_prompt], convert_to_numpy=True, normalize_embeddings=True)[0]
            query_embedding = query_embedding.astype('float32').reshape(1, -1)
            distances, indices = self.index.search(query_embedding, 5)
            
            candidates = []
            for idx in indices[0]:
                if idx < len(self.indexed_examples):
                    candidates.append(self.indexed_examples[idx]['caption'])
            
            if not candidates:
                return self._structural_fallback(user_prompt, category), "🔧 Fallback estructural"
            
            user_words = set(user_prompt.lower().split())
            all_parts = []
            
            for caption in candidates:
                parts = [p.strip() for p in caption.split(',') if 8 <= len(p) <= 120]
                for part in parts:
                    part_lower = part.lower()
                    if len(set(part_lower.split()) - user_words) >= 2:
                        all_parts.append(part)
            
            seen = set()
            unique_parts = []
            for p in all_parts:
                if p not in seen:
                    unique_parts.append(p)
                    seen.add(p)
            
            selected = unique_parts[:6]
            if selected:
                additions = ", ".join(selected)
                enhanced = f"{user_prompt}, {additions}"
                return enhanced, f"✨ Prompt sintetizado con {len(candidates)} ejemplos"
            else:
                return self._structural_fallback(user_prompt, category), "🔧 Fallback estructural (sin frases útiles)"
                
        except Exception as e:
            logger.error(f"❌ Error en _do_enhancement: {e}")
            return user_prompt, f"❌ Error: {str(e)}"

    def _structural_fallback(self, prompt: str, category: str) -> str:
        enhancements = {
            "entity": ", highly detailed, sharp focus, professional photography, 8k resolution",
            "composition": ", cinematic composition, atmospheric perspective, golden hour, ultra-detailed",
            "style": ", artistic rendering, masterpiece, vibrant colors, museum quality",
            "imaginative": ", fantasy art, dreamlike atmosphere, magical lighting, intricate details",
            "text": ", typography design, clear lettering, high contrast, professional layout"
        }
        return prompt + enhancements.get(category, ", high quality, detailed, professional, 8k resolution")

    def get_semantic_example(self, category: str, user_prompt: str = "") -> Optional[str]:
        try:
            if not self.is_ready:
                return "⚠️ Agente no inicializado"
            search_text = user_prompt if user_prompt.strip() else "detailed professional artwork"
            search_embedding = self.embedding_model.encode([search_text], convert_to_numpy=True, normalize_embeddings=True)[0]
            search_embedding = search_embedding.astype('float32').reshape(1, -1)
            k = min(20, len(self.indexed_examples))
            distances, indices = self.index.search(search_embedding, k)
            used_indices = getattr(self, '_used_indices', set())
            for idx in indices[0]:
                if idx < len(self.indexed_examples) and idx not in used_indices:
                    used_indices.add(idx)
                    self._used_indices = used_indices
                    return self.indexed_examples[idx]['caption']
            self._used_indices = set()
            if indices[0].size > 0 and indices[0][0] < len(self.indexed_examples):
                idx = indices[0][0]
                self._used_indices.add(idx)
                return self.indexed_examples[idx]['caption']
            return "🔍 No encontrado"
            
        except Exception as e:
            return f"❌ Error: {str(e)}"

    def get_stats(self) -> Dict:
        return {
            "agente": {
                "total_indexado": self.total_indexed,
                "búsquedas_realizadas": self.searches_performed,
                "listo": self.is_ready
            }
        }