Spaces:

Malaji71
/

Prompteador

Sleeping

App Files Files Community

Prompteador / agent.py

Malaji71

Update agent.py

ebe3b8c verified 3 months ago

raw

history blame contribute delete

8.22 kB

	# agent.py — AGENTE SEMÁNTICO CON SÍNTESIS INTELIGENTE v1.0
	import os
	import time
	import logging
	import numpy as np
	from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError
	from typing import Optional, Tuple, List, Dict, Any
	from sentence_transformers import CrossEncoder, SentenceTransformer
	import faiss
	import spacy
	from spacy.lang.en import English

	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	try:
	NLP = spacy.load("en_core_web_sm")
	logger.info("✅ spaCy 'en_core_web_sm' cargado.")
	except OSError:
	logger.info("📥 Descargando 'en_core_web_sm'...")
	from spacy.cli import download
	download("en_core_web_sm")
	NLP = spacy.load("en_core_web_sm")
	logger.info("✅ spaCy 'en_core_web_sm' descargado y cargado.")
	except Exception as e:
	logger.warning(f"⚠️ Error con spaCy: {e}. Usando tokenizer básico.")
	NLP = English()
	NLP.add_pipe("sentencizer")

	class ImprovedSemanticAgent:
	def __init__(self):
	logger.info("🚀 Cargando modelo de embeddings (bge-small-en-v1.5)...")
	self.embedding_model = SentenceTransformer('BAAI/bge-small-en-v1.5')
	self.reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L6-v2', max_length=512)
	self.index = None
	self.indexed_examples = []
	self.is_ready = False
	self.total_indexed = 0
	self.searches_performed = 0
	logger.info("✅ Agente semántico inicializado")

	def _lazy_init(self) -> str:
	if self.is_ready:
	return "✅ Agente ya inicializado"
	try:
	with ThreadPoolExecutor() as executor:
	future = executor.submit(self._load_precomputed_index)
	try:
	return future.result(timeout=60)
	except FutureTimeoutError:
	return "❌ Timeout inicializando agente"
	except Exception as e:
	return f"❌ Error: {str(e)}"

	def _load_precomputed_index(self) -> str:
	if not os.path.exists("faiss_index.bin") or not os.path.exists("metadata.json"):
	return "❌ Archivos de índice no encontrados"
	self.index = faiss.read_index("faiss_index.bin")
	with open("metadata.json", 'r', encoding='utf-8') as f:
	import json
	self.indexed_examples = json.load(f)
	self.total_indexed = len(self.indexed_examples)
	self.is_ready = True
	return f"✅ ¡Listo! {self.total_indexed:,} ejemplos cargados"

	def _extract_core_entities(self, text: str) -> set:
	if not text.strip():
	return set()
	doc = NLP(text.lower())
	entities = set()
	for token in doc:
	if token.pos_ in ("NOUN", "PROPN", "ADJ") and len(token.text) >= 3 and not token.is_stop:
	entities.add(token.lemma_)
	for chunk in doc.noun_chunks:
	if len(chunk.text) > 2 and not all(t.is_stop for t in chunk):
	entities.add(chunk.lemma_.replace(" ", "_"))
	text_lower = text.lower()
	if "fire" in text_lower or "flame" in text_lower:
	entities.add("on_fire")
	if "ice" in text_lower or "frozen" in text_lower:
	entities.add("frozen")
	if "gold" in text_lower or "golden" in text_lower:
	entities.add("golden")
	return entities

	def enhance_prompt(self, user_prompt: str, category: str = "auto") -> Tuple[str, str]:
	if not self.is_ready:
	init_status = self._lazy_init()
	if not self.is_ready:
	return user_prompt, f"⚠️ {init_status}"
	start_time = time.time()
	self.searches_performed += 1
	enhanced, search_info = self._do_enhancement(user_prompt, category)
	elapsed = time.time() - start_time
	return enhanced, f"{search_info} (Tiempo: {elapsed:.2f}s)"

	def _do_enhancement(self, user_prompt: str, category: str) -> Tuple[str, str]:
	try:
	logger.info(f"🔍 Analizando: '{user_prompt}'")
	query_embedding = self.embedding_model.encode([user_prompt], convert_to_numpy=True, normalize_embeddings=True)[0]
	query_embedding = query_embedding.astype('float32').reshape(1, -1)
	distances, indices = self.index.search(query_embedding, 5)

	candidates = []
	for idx in indices[0]:
	if idx < len(self.indexed_examples):
	candidates.append(self.indexed_examples[idx]['caption'])

	if not candidates:
	return self._structural_fallback(user_prompt, category), "🔧 Fallback estructural"

	user_words = set(user_prompt.lower().split())
	all_parts = []

	for caption in candidates:
	parts = [p.strip() for p in caption.split(',') if 8 <= len(p) <= 120]
	for part in parts:
	part_lower = part.lower()
	if len(set(part_lower.split()) - user_words) >= 2:
	all_parts.append(part)

	seen = set()
	unique_parts = []
	for p in all_parts:
	if p not in seen:
	unique_parts.append(p)
	seen.add(p)

	selected = unique_parts[:6]
	if selected:
	additions = ", ".join(selected)
	enhanced = f"{user_prompt}, {additions}"
	return enhanced, f"✨ Prompt sintetizado con {len(candidates)} ejemplos"
	else:
	return self._structural_fallback(user_prompt, category), "🔧 Fallback estructural (sin frases útiles)"

	except Exception as e:
	logger.error(f"❌ Error en _do_enhancement: {e}")
	return user_prompt, f"❌ Error: {str(e)}"

	def _structural_fallback(self, prompt: str, category: str) -> str:
	enhancements = {
	"entity": ", highly detailed, sharp focus, professional photography, 8k resolution",
	"composition": ", cinematic composition, atmospheric perspective, golden hour, ultra-detailed",
	"style": ", artistic rendering, masterpiece, vibrant colors, museum quality",
	"imaginative": ", fantasy art, dreamlike atmosphere, magical lighting, intricate details",
	"text": ", typography design, clear lettering, high contrast, professional layout"
	}
	return prompt + enhancements.get(category, ", high quality, detailed, professional, 8k resolution")

	def get_semantic_example(self, category: str, user_prompt: str = "") -> Optional[str]:
	try:
	if not self.is_ready:
	return "⚠️ Agente no inicializado"
	search_text = user_prompt if user_prompt.strip() else "detailed professional artwork"
	search_embedding = self.embedding_model.encode([search_text], convert_to_numpy=True, normalize_embeddings=True)[0]
	search_embedding = search_embedding.astype('float32').reshape(1, -1)
	k = min(20, len(self.indexed_examples))
	distances, indices = self.index.search(search_embedding, k)
	used_indices = getattr(self, '_used_indices', set())
	for idx in indices[0]:
	if idx < len(self.indexed_examples) and idx not in used_indices:
	used_indices.add(idx)
	self._used_indices = used_indices
	return self.indexed_examples[idx]['caption']
	self._used_indices = set()
	if indices[0].size > 0 and indices[0][0] < len(self.indexed_examples):
	idx = indices[0][0]
	self._used_indices.add(idx)
	return self.indexed_examples[idx]['caption']
	return "🔍 No encontrado"

	except Exception as e:
	return f"❌ Error: {str(e)}"

	def get_stats(self) -> Dict:
	return {
	"agente": {
	"total_indexado": self.total_indexed,
	"búsquedas_realizadas": self.searches_performed,
	"listo": self.is_ready
	}
	}