Spaces:
Running
Running
File size: 8,219 Bytes
ebe3b8c 67930d0 4c3d4d3 67930d0 4c3d4d3 41afdf3 e1fc349 41afdf3 4c3d4d3 e1fc349 4c3d4d3 67930d0 4c3d4d3 67930d0 4c3d4d3 67930d0 4c3d4d3 67930d0 e1fc349 67930d0 953684a 67930d0 4c3d4d3 e1fc349 4c3d4d3 67930d0 953684a 67930d0 e24ddbd ebe3b8c e24ddbd 2333643 e24ddbd ebe3b8c e24ddbd ebe3b8c e24ddbd ebe3b8c e24ddbd 531b6d6 e24ddbd ebe3b8c 953684a e24ddbd 953684a e24ddbd 953684a ebe3b8c e24ddbd 953684a e24ddbd 953684a e24ddbd ebe3b8c 67930d0 953684a e24ddbd 67930d0 953684a 67930d0 953684a 67930d0 953684a 67930d0 953684a 67930d0 953684a 67930d0 4c3d4d3 67930d0 4c3d4d3 953684a ebe3b8c 67930d0 953684a 4c3d4d3 953684a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 | # agent.py — AGENTE SEMÁNTICO CON SÍNTESIS INTELIGENTE v1.0
import os
import time
import logging
import numpy as np
from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError
from typing import Optional, Tuple, List, Dict, Any
from sentence_transformers import CrossEncoder, SentenceTransformer
import faiss
import spacy
from spacy.lang.en import English
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
try:
NLP = spacy.load("en_core_web_sm")
logger.info("✅ spaCy 'en_core_web_sm' cargado.")
except OSError:
logger.info("📥 Descargando 'en_core_web_sm'...")
from spacy.cli import download
download("en_core_web_sm")
NLP = spacy.load("en_core_web_sm")
logger.info("✅ spaCy 'en_core_web_sm' descargado y cargado.")
except Exception as e:
logger.warning(f"⚠️ Error con spaCy: {e}. Usando tokenizer básico.")
NLP = English()
NLP.add_pipe("sentencizer")
class ImprovedSemanticAgent:
def __init__(self):
logger.info("🚀 Cargando modelo de embeddings (bge-small-en-v1.5)...")
self.embedding_model = SentenceTransformer('BAAI/bge-small-en-v1.5')
self.reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L6-v2', max_length=512)
self.index = None
self.indexed_examples = []
self.is_ready = False
self.total_indexed = 0
self.searches_performed = 0
logger.info("✅ Agente semántico inicializado")
def _lazy_init(self) -> str:
if self.is_ready:
return "✅ Agente ya inicializado"
try:
with ThreadPoolExecutor() as executor:
future = executor.submit(self._load_precomputed_index)
try:
return future.result(timeout=60)
except FutureTimeoutError:
return "❌ Timeout inicializando agente"
except Exception as e:
return f"❌ Error: {str(e)}"
def _load_precomputed_index(self) -> str:
if not os.path.exists("faiss_index.bin") or not os.path.exists("metadata.json"):
return "❌ Archivos de índice no encontrados"
self.index = faiss.read_index("faiss_index.bin")
with open("metadata.json", 'r', encoding='utf-8') as f:
import json
self.indexed_examples = json.load(f)
self.total_indexed = len(self.indexed_examples)
self.is_ready = True
return f"✅ ¡Listo! {self.total_indexed:,} ejemplos cargados"
def _extract_core_entities(self, text: str) -> set:
if not text.strip():
return set()
doc = NLP(text.lower())
entities = set()
for token in doc:
if token.pos_ in ("NOUN", "PROPN", "ADJ") and len(token.text) >= 3 and not token.is_stop:
entities.add(token.lemma_)
for chunk in doc.noun_chunks:
if len(chunk.text) > 2 and not all(t.is_stop for t in chunk):
entities.add(chunk.lemma_.replace(" ", "_"))
text_lower = text.lower()
if "fire" in text_lower or "flame" in text_lower:
entities.add("on_fire")
if "ice" in text_lower or "frozen" in text_lower:
entities.add("frozen")
if "gold" in text_lower or "golden" in text_lower:
entities.add("golden")
return entities
def enhance_prompt(self, user_prompt: str, category: str = "auto") -> Tuple[str, str]:
if not self.is_ready:
init_status = self._lazy_init()
if not self.is_ready:
return user_prompt, f"⚠️ {init_status}"
start_time = time.time()
self.searches_performed += 1
enhanced, search_info = self._do_enhancement(user_prompt, category)
elapsed = time.time() - start_time
return enhanced, f"{search_info} (Tiempo: {elapsed:.2f}s)"
def _do_enhancement(self, user_prompt: str, category: str) -> Tuple[str, str]:
try:
logger.info(f"🔍 Analizando: '{user_prompt}'")
query_embedding = self.embedding_model.encode([user_prompt], convert_to_numpy=True, normalize_embeddings=True)[0]
query_embedding = query_embedding.astype('float32').reshape(1, -1)
distances, indices = self.index.search(query_embedding, 5)
candidates = []
for idx in indices[0]:
if idx < len(self.indexed_examples):
candidates.append(self.indexed_examples[idx]['caption'])
if not candidates:
return self._structural_fallback(user_prompt, category), "🔧 Fallback estructural"
user_words = set(user_prompt.lower().split())
all_parts = []
for caption in candidates:
parts = [p.strip() for p in caption.split(',') if 8 <= len(p) <= 120]
for part in parts:
part_lower = part.lower()
if len(set(part_lower.split()) - user_words) >= 2:
all_parts.append(part)
seen = set()
unique_parts = []
for p in all_parts:
if p not in seen:
unique_parts.append(p)
seen.add(p)
selected = unique_parts[:6]
if selected:
additions = ", ".join(selected)
enhanced = f"{user_prompt}, {additions}"
return enhanced, f"✨ Prompt sintetizado con {len(candidates)} ejemplos"
else:
return self._structural_fallback(user_prompt, category), "🔧 Fallback estructural (sin frases útiles)"
except Exception as e:
logger.error(f"❌ Error en _do_enhancement: {e}")
return user_prompt, f"❌ Error: {str(e)}"
def _structural_fallback(self, prompt: str, category: str) -> str:
enhancements = {
"entity": ", highly detailed, sharp focus, professional photography, 8k resolution",
"composition": ", cinematic composition, atmospheric perspective, golden hour, ultra-detailed",
"style": ", artistic rendering, masterpiece, vibrant colors, museum quality",
"imaginative": ", fantasy art, dreamlike atmosphere, magical lighting, intricate details",
"text": ", typography design, clear lettering, high contrast, professional layout"
}
return prompt + enhancements.get(category, ", high quality, detailed, professional, 8k resolution")
def get_semantic_example(self, category: str, user_prompt: str = "") -> Optional[str]:
try:
if not self.is_ready:
return "⚠️ Agente no inicializado"
search_text = user_prompt if user_prompt.strip() else "detailed professional artwork"
search_embedding = self.embedding_model.encode([search_text], convert_to_numpy=True, normalize_embeddings=True)[0]
search_embedding = search_embedding.astype('float32').reshape(1, -1)
k = min(20, len(self.indexed_examples))
distances, indices = self.index.search(search_embedding, k)
used_indices = getattr(self, '_used_indices', set())
for idx in indices[0]:
if idx < len(self.indexed_examples) and idx not in used_indices:
used_indices.add(idx)
self._used_indices = used_indices
return self.indexed_examples[idx]['caption']
self._used_indices = set()
if indices[0].size > 0 and indices[0][0] < len(self.indexed_examples):
idx = indices[0][0]
self._used_indices.add(idx)
return self.indexed_examples[idx]['caption']
return "🔍 No encontrado"
except Exception as e:
return f"❌ Error: {str(e)}"
def get_stats(self) -> Dict:
return {
"agente": {
"total_indexado": self.total_indexed,
"búsquedas_realizadas": self.searches_performed,
"listo": self.is_ready
}
} |