# app.py — PromptCraft: Refinamiento Estructural de Prompts (versión equilibrada) import gradio as gr import os import time import logging from typing import Optional, Tuple from PIL import Image from agent import ImprovedSemanticAgent from huggingface_hub import InferenceClient from transformers import pipeline from openai import OpenAI, APIError, Timeout logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class LlamaRefiner: def __init__(self): hf_token = os.getenv("PS") if not hf_token: raise ValueError("Secret 'PS' (HF_TOKEN) no encontrado.") self.hf_client = InferenceClient(api_key=hf_token) self.agent = ImprovedSemanticAgent() if not self.agent.is_ready: init_msg = self.agent._lazy_init() logger.info(f"Inicialización del agente: {init_msg}") if not self.agent.is_ready: logger.error("❌ Agente NO está listo.") logger.info("🚀 Cargando traductor local (es → en)...") self.translator = pipeline( "translation_es_to_en", model="Helsinki-NLP/opus-mt-es-en", device=-1 ) logger.info("✅ Traductor local listo.") def translate_to_english(self, text: str) -> str: if not text.strip(): return text try: result = self.translator(text, max_length=250, clean_up_tokenization_spaces=True) raw_translation = result[0]['translation_text'].strip() except Exception as e: logger.warning(f"Traducción local fallida: {e}. Usando texto original.") raw_translation = text user_text_lower = text.lower() output = raw_translation if any(kw in user_text_lower for kw in ["llamas", "ardiendo", "quem", "incendi", "fuego"]): output = output.replace("fiery", "on fire") if not any(term in output.lower() for term in ["on fire", "burning", "in flames", "ablaze", "aflame"]): output = output + " on fire" if any(kw in user_text_lower for kw in ["oro", "dorado"]): if "golden" not in output.lower() and "gold" not in output.lower(): if any(w in output.lower() for w in ["statue", "sculpture", "figure"]): output = output + " made of gold" else: output = output + " golden" if any(kw in user_text_lower for kw in ["congelado", "hielo", "helado", "ice"]): if not any(term in output.lower() for term in ["frozen", "ice", "icy"]): output = output + " frozen" return output.strip() def retrieve_similar_examples(self, user_prompt_en: str, category: str = "auto", k: int = 6) -> list: if not self.agent.is_ready: return [] try: query_embedding = self.agent.embedding_model.encode([user_prompt_en], convert_to_numpy=True, normalize_embeddings=True)[0] query_embedding = query_embedding.astype('float32').reshape(1, -1) distances, indices = self.agent.index.search(query_embedding, 50) candidates = [] for idx in indices[0]: if 0 <= idx < len(self.agent.indexed_examples): ex = self.agent.indexed_examples[idx] caption = ex.get('caption', '') ex_category = ex.get('category', 'auto') if isinstance(caption, str) and len(caption) > 10: if category == "auto" or ex_category == category: candidates.append((idx, caption, ex_category)) if not candidates: return [] if len(candidates) <= k: return [cap for _, cap, _ in candidates] candidate_texts = [cap for _, cap, _ in candidates] pairs = [[user_prompt_en, cand] for cand in candidate_texts] scores = self.agent.reranker.predict(pairs) scored = [(candidates[i][1], scores[i]) for i in range(len(candidates))] scored.sort(key=lambda x: x[1], reverse=True) top_examples = [ex for ex, _ in scored[:k]] return top_examples except Exception as e: logger.error(f"Error en recuperación: {e}") try: return [ self.agent.indexed_examples[idx]['caption'] for idx in indices[0][:k] if 0 <= idx < len(self.agent.indexed_examples) ] except: return [] def _clean_output(self, text: str) -> str: text = text.strip() if text.startswith(("Here is", "Final:", "Output:", '"', "'")): text = text.split(":", 1)[-1].strip().strip("\"'") return text def refine_with_llm(self, user_prompt: str, category: str = "auto") -> Tuple[str, str, list]: user_prompt_en = self.translate_to_english(user_prompt) examples = self.retrieve_similar_examples(user_prompt_en, category=category, k=6) if not examples: fallbacks = { "entity": [ "an elderly maya man weaving a hammock under a ceiba tree, golden hour light filtering through leaves, Antigua Guatemala setting, hyperrealistic style", "a young indigenous woman in traditional Kekchi attire by Lake Atitlán, morning mist, volcano backdrop, soft natural light, documentary photography" ], "style": [ "oil painting of a forest in autumn, warm amber and crimson tones, impasto brushstrokes, style of Vincent van Gogh", "cyberpunk cityscape at night, neon reflections on wet streets, cinematic lighting, style of Blade Runner 2049" ], "composition": [ "a lone wolf on a snowy mountain peak, northern lights in the sky, wind blowing snow, rule of thirds composition, photorealistic wildlife" ], "imaginative": [ "a floating island with ancient ruins, waterfalls cascading into clouds, golden hour, fantasy concept art, highly detailed" ], "text": [ "minimalist typography design, the word 'LIBERTAD' in bold sans-serif, high contrast black on white, professional layout" ], "auto": [ "an elderly maya man weaving a hammock under a ceiba tree, golden hour light filtering through leaves, Antigua Guatemala setting, hyperrealistic style, intricate textures of rope and bark", "a cyberpunk street at night in Tokyo, neon signs reflecting on wet pavement, rain mist in air, distant flying cars, cinematic wide shot, Blade Runner atmosphere", "a library interior with tall oak bookshelves, sunbeams through stained glass windows, dust particles floating, oil painting style, warm amber tones, masterpiece", "a lone wolf howling on a snowy mountain peak, northern lights in the sky, wind blowing snow, photorealistic wildlife photography, 8k detailed fur", "a steampunk airship floating above Victorian London, copper pipes and brass gears, cloudy sky, detailed machinery, concept art by Jakub Rozalski", "a young woman in traditional Kekchi attire standing by Lake Atitlán, morning mist, volcano backdrop, soft natural light, documentary photography style" ] } examples = fallbacks.get(category, fallbacks["auto"]) logger.warning(f"No se encontraron ejemplos para categoría '{category}'. Usando fallback.") # 🔄 System message flexible pero con énfasis en literalidad system_message = ( "You are a prompt engineering analyst for diffusion models (Midjourney, FLUX, SDXL). " "Analyze the DESCRIPTIVE GRAMMAR (word order, phrasing, element sequence) used in the reference prompts below. " "Reconstruct the user's concept using that exact same descriptive logic. " "Do NOT follow a predefined template (e.g. subject→lighting→style). " "Do NOT invent elements not implied by the user. " "If the user specifies that something is 'on fire', ensure that the flames are on that object or creature itself — not merely in the background or environment. " "Preserve the user's core intent exactly. " "Output ONLY the final prompt in English. No explanations, no markdown." ) # 🧠 Mensaje de usuario: natural, pero con señal clara de literalidad core_user_message = f"User concept:\n{user_prompt_en}" if any(term in user_prompt_en.lower() for term in ["on fire", "burning", "ablaze", "aflame"]): core_user_message += "\n\n⚠️ Note: The subject (e.g., horse, dragon) must be physically on fire with visible flames." user_message = core_user_message + "\n\nReference prompts (observe their descriptive grammar):\n" + "\n".join(examples) try: client = OpenAI( base_url="https://router.huggingface.co/v1", api_key=os.getenv("PS") ) completion = client.chat.completions.create( model="meta-llama/Llama-3.2-3B-Instruct:together", messages=[ {"role": "system", "content": system_message}, {"role": "user", "content": user_message} ], max_tokens=250, temperature=0.2, timeout=30.0 ) refined = self._clean_output(completion.choices[0].message.content) info = f"🧠 Refinado con Llama-3.2-3B vía Together (HF Router, {len(examples)} ejemplos, categoría: {category})." return refined, info, examples except (APIError, Timeout, Exception) as e1: logger.error(f"Error con Together (HF Router): {e1}") try: completion = self.hf_client.chat.completions.create( model="meta-llama/Llama-3.2-3B-Instruct", messages=[ {"role": "system", "content": system_message}, {"role": "user", "content": user_message} ], max_tokens=250, temperature=0.2 ) refined = self._clean_output(completion.choices[0].message.content) info = f"🧠 Fallback: HF (Hyperbolic, {len(examples)} ejemplos)." return refined, info, examples except Exception as e2: logger.error(f"También falló Hyperbolic: {e2}") enhanced_prompt, _ = self.agent.enhance_prompt(user_prompt_en, category=category) return enhanced_prompt.strip(), f"⚠️ LLMs no disponibles. Usando enriquecimiento semántico (categoría: {category}).", examples class SDXLGenerator: def __init__(self): hf_token = os.getenv("PS") if not hf_token: raise ValueError("Secret 'PS' (HF_TOKEN) no encontrado.") self.client = InferenceClient(api_key=hf_token) def generate_image(self, prompt: str, width: int = 1024, height: int = 1024) -> Tuple[Optional[str], str]: try: image = self.client.text_to_image( prompt=prompt, model="stabilityai/stable-diffusion-xl-base-1.0", width=width, height=height ) output_path = f"/tmp/image_{int(time.time())}.png" image.save(output_path) return output_path, "Imagen generada con éxito." except Exception as e: return None, f"Error en generación: {str(e)}" def create_interface(): try: refiner = LlamaRefiner() generator = SDXLGenerator() except Exception as e: refiner = None generator = None logger.error(f"Inicialización fallida: {e}") def refine_prompt_only(prompt: str, category_es: str, progress=gr.Progress()): if not prompt.strip(): return "", "", "Prompt vacío." if refiner is None: return "", "", "Servicios no disponibles." progress(0.2, desc="🌍 Traduciendo y mejorando...") category_map = { "Automática": "auto", "Entidad": "entity", "Composición": "composition", "Estilo artístico": "style", "Imaginativo": "imaginative", "Texto": "text" } category_en = category_map.get(category_es, "auto") refined, info, examples = refiner.refine_with_llm(prompt, category_en) examples_text = "\n".join(f"{i+1}. {ex}" for i, ex in enumerate(examples)) if examples else "Ninguno" status = f"Prompt refinado: {refined}\n{info}" return refined, examples_text, status def generate_image_only(refined_prompt: str, aspect_ratio: str, progress=gr.Progress()): if not refined_prompt.strip(): return None, "❌ No hay prompt refinado. Primero haz clic en 'Refinar prompt'." if generator is None: return None, "❌ Generador no inicializado." aspect_ratios = { "1:1": (1024, 1024), "16:9": (1344, 768), "9:16": (768, 1344), "4:3": (1152, 896), "3:4": (896, 1152), "21:9": (1536, 640), "9:21": (640, 1536), } width, height = aspect_ratios.get(aspect_ratio, (1024, 1024)) progress(0.5, desc="🎨 Generando imagen (puede tardar 10-20s)...") try: image_path, gen_msg = generator.generate_image(refined_prompt, width, height) return image_path, gen_msg except Exception as e: error_msg = f"❌ Error al generar: {str(e)}" logger.error(error_msg) return None, error_msg CATEGORY_CHOICES_ES = ["Automática", "Entidad", "Composición", "Estilo artístico", "Imaginativo", "Texto"] with gr.Blocks(title="PromptCraft: Refinamiento Estructural de Prompts") as demo: gr.HTML("""

PromptCraft: Refinamiento Estructural de Prompts

Esta herramienta genera prompts optimizados para modelos de difusión (como Midjourney, Flux o SDXL) mediante el análisis estructural de un dataset de 100.000 prompts.
El usuario introduce su idea en castellano.
El sistema traduce ese texto a inglés.
Recupera los prompts semánticamente más cercanos en el dataset.
Usa un modelo de lenguaje (Llama-3.2-3B) para reconstruir el prompt del usuario a partir de la estructura descriptiva de los prompts del dataset (sujeto → contexto → entorno → iluminación → estilo).
Entrega un prompt en inglés listo para generación de imagen.
No añade elementos no sugeridos por el usuario. Su objetivo es la coherencia estructural, no la invención creativa.

""") with gr.Row(): with gr.Column(): prompt_input = gr.Textbox(label="Tu idea (en castellano)", lines=3, placeholder="Ej: un caballo en llamas galopando en un bosque...") category_es = gr.Dropdown(label="Categoría", choices=CATEGORY_CHOICES_ES, value="Automática") aspect = gr.Dropdown(label="Proporción", choices=["1:1", "16:9", "9:16", "4:3", "3:4", "21:9", "9:21"], value="1:1") refine_btn = gr.Button("🔄 Refinar prompt", variant="secondary") generate_btn = gr.Button("🎨 Generar imagen", variant="primary") with gr.Column(): refined_output = gr.Textbox(label="Prompt refinado (inglés)", interactive=False, lines=3) image_out = gr.Image(label="Imagen", type="filepath", height=450) examples_out = gr.Textbox(label="Ejemplos del dataset (para análisis)", interactive=False, lines=6) status_out = gr.Textbox(label="Estado", interactive=False, lines=4) gr.HTML("""
Creado por Angel E. Pariente 🇬🇹 • Sobre una idea de Nacho Ravinovich 🇦🇷
""") refine_btn.click( fn=refine_prompt_only, inputs=[prompt_input, category_es], outputs=[refined_output, examples_out, status_out], show_progress=True ) generate_btn.click( fn=generate_image_only, inputs=[refined_output, aspect], outputs=[image_out, status_out], show_progress=True ) return demo if __name__ == "__main__": demo = create_interface() demo.launch(server_name="0.0.0.0", server_port=7860, show_api=False)