Flux_Prompt_Optimizer

Runtime error

App Files Files Community

Malaji71 commited on Jun 12, 2025

Commit

482876d

verified ·

1 Parent(s): 40b615f

Delete optimizer.py

Browse files

Files changed (1) hide show

optimizer.py +0 -443

optimizer.py DELETED Viewed

@@ -1,443 +0,0 @@
-"""
-Ultra Supreme Optimizer - Main optimization engine for image analysis
-VERSIÓN FLORENCE-2 - Usa Florence-2 en lugar de CLIP Interrogator
-"""
-# IMPORTANT: spaces must be imported BEFORE torch or any CUDA-using library
-import spaces
-import gc
-import logging
-import re
-from datetime import datetime
-from typing import Tuple, Dict, Any, Optional
-import torch
-import numpy as np
-from PIL import Image
-from transformers import AutoProcessor, AutoModelForCausalLM
-from analyzer import UltraSupremeAnalyzer
-logger = logging.getLogger(__name__)
-class UltraSupremeOptimizer:
-    """Main optimizer class for ultra supreme image analysis"""
-    def __init__(self):
-        self.processor = None
-        self.model = None
-        self.analyzer = UltraSupremeAnalyzer()
-        self.usage_count = 0
-        self.device = self._get_device()
-        self.is_initialized = False
-    @staticmethod
-    def _get_device() -> str:
-        """Determine the best available device for computation"""
-        if torch.cuda.is_available():
-            return "cuda"
-        elif torch.backends.mps.is_available():
-            return "mps"
-        else:
-            return "cpu"
-    def initialize_model(self) -> bool:
-        """Initialize Florence-2 model"""
-        if self.is_initialized:
-            return True
-        try:
-            logger.info("Loading Florence-2 model...")
-            # Load Florence-2 base model (you can also use 'microsoft/Florence-2-large' for better quality)
-            model_id = "microsoft/Florence-2-base"
-            self.processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
-            self.model = AutoModelForCausalLM.from_pretrained(
-                model_id,
-                trust_remote_code=True,
-                torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
-            )
-            # Keep model on CPU initially
-            self.model = self.model.to("cpu")
-            self.model.eval()
-            self.is_initialized = True
-            # Clean up memory after initialization
-            gc.collect()
-            logger.info("Florence-2 model initialized successfully")
-            return True
-        except Exception as e:
-            logger.error(f"Model initialization error: {e}")
-            return False
-    def optimize_image(self, image: Any) -> Optional[Image.Image]:
-        """Optimize image for processing"""
-        if image is None:
-            return None
-        try:
-            # Convert to PIL Image if necessary
-            if isinstance(image, np.ndarray):
-                image = Image.fromarray(image)
-            elif not isinstance(image, Image.Image):
-                image = Image.open(image)
-            # Convert to RGB if necessary
-            if image.mode != 'RGB':
-                image = image.convert('RGB')
-            # Florence-2 handles various sizes well, but let's be reasonable
-            max_size = 1024
-            if image.size[0] > max_size or image.size[1] > max_size:
-                image.thumbnail((max_size, max_size), Image.Resampling.LANCZOS)
-            return image
-        except Exception as e:
-            logger.error(f"Image optimization error: {e}")
-            return None
-    def apply_flux_rules(self, base_prompt: str) -> str:
-        """Aplica las reglas de Flux a un prompt base"""
-        # Limpiar el prompt de elementos no deseados
-        cleanup_patterns = [
-            r',\s*trending on artstation',
-            r',\s*trending on [^,]+',
-            r',\s*\d+k\s*',
-            r',\s*\d+k resolution',
-            r',\s*artstation',
-            r',\s*concept art',
-            r',\s*digital art',
-            r',\s*by greg rutkowski',
-        ]
-        cleaned_prompt = base_prompt
-        for pattern in cleanup_patterns:
-            cleaned_prompt = re.sub(pattern, '', cleaned_prompt, flags=re.IGNORECASE)
-        # Detectar el tipo de imagen para añadir configuración de cámara apropiada
-        camera_config = ""
-        if any(word in base_prompt.lower() for word in ['portrait', 'person', 'man', 'woman', 'face']):
-            camera_config = ", Shot on Hasselblad X2D 100C, 90mm f/2.5 lens at f/2.8, professional portrait photography"
-        elif any(word in base_prompt.lower() for word in ['landscape', 'mountain', 'nature', 'outdoor']):
-            camera_config = ", Shot on Phase One XT, 40mm f/4 lens at f/8, epic landscape photography"
-        elif any(word in base_prompt.lower() for word in ['street', 'urban', 'city']):
-            camera_config = ", Shot on Leica M11, 35mm f/1.4 lens at f/2.8, documentary street photography"
-        else:
-            camera_config = ", Shot on Phase One XF IQ4, 80mm f/2.8 lens at f/4, professional photography"
-        # Añadir mejoras de iluminación si no están presentes
-        if 'lighting' not in cleaned_prompt.lower():
-            if 'dramatic' in cleaned_prompt.lower():
-                cleaned_prompt += ", dramatic cinematic lighting"
-            elif 'portrait' in cleaned_prompt.lower():
-                cleaned_prompt += ", professional studio lighting with subtle rim light"
-            else:
-                cleaned_prompt += ", masterful natural lighting"
-        # Construir el prompt final
-        final_prompt = cleaned_prompt + camera_config
-        # Asegurar que empiece con mayúscula
-        final_prompt = final_prompt[0].upper() + final_prompt[1:] if final_prompt else final_prompt
-        # Limpiar espacios y comas duplicadas
-        final_prompt = re.sub(r'\s+', ' ', final_prompt)
-        final_prompt = re.sub(r',\s*,+', ',', final_prompt)
-        return final_prompt
-    @spaces.GPU(duration=60)
-    def run_florence_inference(self, image: Image.Image) -> Tuple[str, str, str]:
-        """Run Florence-2 inference on GPU"""
-        try:
-            # Move model to GPU
-            self.model = self.model.to("cuda")
-            logger.info("Florence-2 model moved to GPU")
-            # Task prompts for different types of analysis
-            tasks = {
-                "detailed_caption": "<DETAILED_CAPTION>",
-                "more_detailed_caption": "<MORE_DETAILED_CAPTION>",
-                "caption": "<CAPTION>",
-                "dense_region_caption": "<DENSE_REGION_CAPTION>"
-            }
-            results = {}
-            # Run different captioning tasks
-            for task_name, task_prompt in tasks.items():
-                try:
-                    inputs = self.processor(text=task_prompt, images=image, return_tensors="pt")
-                    inputs = {k: v.to("cuda") for k, v in inputs.items()}
-                    with torch.cuda.amp.autocast(dtype=torch.float16):
-                        generated_ids = self.model.generate(
-                            input_ids=inputs["input_ids"],
-                            pixel_values=inputs["pixel_values"],
-                            max_new_tokens=1024,
-                            num_beams=3,
-                            do_sample=False
-                        )
-                    generated_text = self.processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
-                    parsed = self.processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height))
-                    # Extract the caption from the parsed result
-                    if task_prompt in parsed:
-                        results[task_name] = parsed[task_prompt]
-                    else:
-                        # Sometimes the result is directly in the parsed output
-                        results[task_name] = str(parsed) if parsed else ""
-                except Exception as e:
-                    logger.warning(f"Error in {task_name}: {e}")
-                    results[task_name] = ""
-            # Extract results
-            detailed_caption = results.get("detailed_caption", "")
-            more_detailed = results.get("more_detailed_caption", "")
-            caption = results.get("caption", "")
-            # Combine for a comprehensive description
-            if more_detailed:
-                full_prompt = more_detailed
-            elif detailed_caption:
-                full_prompt = detailed_caption
-            else:
-                full_prompt = caption
-            # Use different levels as our three outputs
-            clip_fast = caption if caption else "A photograph"
-            clip_classic = detailed_caption if detailed_caption else full_prompt
-            clip_best = more_detailed if more_detailed else full_prompt
-            logger.info(f"Florence-2 captions generated successfully")
-            return full_prompt, clip_fast, clip_classic
-        except Exception as e:
-            logger.error(f"Florence-2 inference error: {e}")
-            # Move model back to CPU to free GPU memory
-            self.model = self.model.to("cpu")
-            raise e
-        finally:
-            # Always move model back to CPU after inference
-            self.model = self.model.to("cpu")
-            torch.cuda.empty_cache()
-    def generate_ultra_supreme_prompt(self, image: Any) -> Tuple[str, str, int, Dict[str, int]]:
-        """
-        Generate ultra supreme prompt from image usando Florence-2
-        Returns:
-            Tuple of (prompt, analysis_info, score, breakdown)
-        """
-        try:
-            # Inicializar modelo si no está inicializado
-            if not self.is_initialized:
-                if not self.initialize_model():
-                    return "❌ Model initialization failed.", "Please refresh and try again.", 0, {}
-            # Validate input
-            if image is None:
-                return "❌ Please upload an image.", "No image provided.", 0, {}
-            self.usage_count += 1
-            # Optimize image
-            image = self.optimize_image(image)
-            if image is None:
-                return "❌ Image processing failed.", "Invalid image format.", 0, {}
-            start_time = datetime.now()
-            logger.info("ULTRA SUPREME ANALYSIS - Starting with Florence-2")
-            # Ejecutar inferencia Florence-2
-            try:
-                full_prompt, caption_fast, caption_detailed = self.run_florence_inference(image)
-            except Exception as e:
-                logger.error(f"Florence-2 failed: {e}")
-                # Fallback básico
-                full_prompt = "A photograph"
-                caption_fast = "image"
-                caption_detailed = "detailed image"
-            logger.info(f"Florence-2 caption: {full_prompt[:100]}...")
-            # Ejecutar análisis ultra supremo con múltiples modelos
-            logger.info("Running multi-model ultra supreme analysis...")
-            ultra_analysis = self.analyzer.ultra_supreme_analysis(
-                image, caption_fast, caption_detailed, full_prompt
-            )
-            # Construir prompt mejorado basado en análisis completo
-            enhanced_prompt_parts = []
-            # Base prompt de Florence
-            enhanced_prompt_parts.append(full_prompt)
-            # Agregar información demográfica si está disponible
-            if ultra_analysis["demographic"]["gender"] and ultra_analysis["demographic"]["gender_confidence"] > 0.7:
-                gender = ultra_analysis["demographic"]["gender"]
-                age_cat = ultra_analysis["demographic"]["age_category"]
-                if age_cat:
-                    enhanced_prompt_parts.append(f"{age_cat} {gender}")
-            # Agregar estado emocional principal
-            if ultra_analysis["emotional_state"]["primary_emotion"] and ultra_analysis["emotional_state"]["emotion_confidence"] > 0.6:
-                emotion = ultra_analysis["emotional_state"]["primary_emotion"]
-                enhanced_prompt_parts.append(f"{emotion} expression")
-            # Agregar información de pose si está disponible
-            if ultra_analysis["pose_composition"]["posture"]:
-                enhanced_prompt_parts.append(ultra_analysis["pose_composition"]["posture"][0])
-            # Combinar y aplicar reglas de Flux
-            combined_prompt = ", ".join(enhanced_prompt_parts)
-            optimized_prompt = self.apply_flux_rules(combined_prompt)
-            # Si el analyzer enriqueció el prompt, úsalo
-            analyzer_prompt = self.analyzer.build_ultra_supreme_prompt(ultra_analysis, [full_prompt])
-            if len(analyzer_prompt) > len(optimized_prompt):
-                optimized_prompt = self.apply_flux_rules(analyzer_prompt)
-            # Calcular score usando el analyzer
-            score, breakdown = self.analyzer.calculate_ultra_supreme_score(optimized_prompt, ultra_analysis)
-            end_time = datetime.now()
-            duration = (end_time - start_time).total_seconds()
-            # Memory cleanup
-            gc.collect()
-            if torch.cuda.is_available():
-                torch.cuda.empty_cache()
-            # Generate enhanced analysis report con datos de múltiples modelos
-            analysis_info = self._generate_ultra_analysis_report(
-                ultra_analysis, score, breakdown, duration, "Florence-2"
-            )
-            return optimized_prompt, analysis_info, score, breakdown
-        except Exception as e:
-            logger.error(f"Ultra supreme generation error: {e}", exc_info=True)
-            return f"❌ Error: {str(e)}", "Please try with a different image.", 0, {}
-    def _generate_ultra_analysis_report(self, analysis: Dict[str, Any],
-                                       score: int, breakdown: Dict[str, int],
-                                       duration: float, caption_model: str = "Florence-2") -> str:
-        """Generate ultra detailed analysis report with multi-model results"""
-        device_used = "cuda" if torch.cuda.is_available() else "cpu"
-        gpu_status = "⚡ ZeroGPU" if device_used == "cuda" else "💻 CPU"
-        # Demographic info
-        demo_info = ""
-        if analysis["demographic"]["age_category"]:
-            age = analysis["demographic"]["age_category"].replace("_", " ").title()
-            gender = analysis["demographic"]["gender"] or "person"
-            confidence = analysis["demographic"]["age_confidence"]
-            demo_info = f"**Detected:** {age} {gender} (confidence: {confidence:.0%})"
-        # Emotion info
-        emotion_info = ""
-        if analysis["emotional_state"]["primary_emotion"]:
-            emotion = analysis["emotional_state"]["primary_emotion"]
-            confidence = analysis["emotional_state"]["emotion_confidence"]
-            emotion_info = f"**Primary Emotion:** {emotion} ({confidence:.0%})"
-            # Add emotion distribution if available
-            if analysis["emotional_state"]["emotion_distribution"]:
-                top_emotions = sorted(
-                    analysis["emotional_state"]["emotion_distribution"].items(),
-                    key=lambda x: x[1], reverse=True
-                )[:3]
-                emotion_details = ", ".join([f"{e[0]}: {e[1]:.0%}" for e in top_emotions])
-                emotion_info += f"\n**Emotion Distribution:** {emotion_details}"
-        # Face analysis info
-        face_info = f"**Faces Detected:** {analysis['facial_ultra']['face_count']}"
-        if analysis['facial_ultra']['face_count'] > 0:
-            features = []
-            for feature_type in ['eyes', 'mouth', 'facial_hair', 'skin']:
-                if analysis['facial_ultra'].get(feature_type):
-                    features.extend(analysis['facial_ultra'][feature_type])
-            if features:
-                face_info += f"\n**Facial Features:** {', '.join(features[:5])}"
-        # Pose info
-        pose_info = ""
-        if analysis["pose_composition"].get("pose_confidence", 0) > 0:
-            confidence = analysis["pose_composition"]["pose_confidence"]
-            pose_info = f"**Pose Analysis:** Body detected ({confidence:.0%} confidence)"
-            if analysis["pose_composition"]["posture"]:
-                pose_info += f"\n**Posture:** {', '.join(analysis['pose_composition']['posture'])}"
-        # Environment info
-        env_info = ""
-        if analysis["environmental"]["setting_type"]:
-            env_info = f"**Setting:** {analysis['environmental']['setting_type'].replace('_', ' ').title()}"
-        if analysis["environmental"]["lighting_analysis"]:
-            env_info += f"\n**Lighting:** {', '.join(analysis['environmental']['lighting_analysis'])}"
-        # Intelligence metrics
-        metrics = analysis["intelligence_metrics"]
-        # Caption info
-        caption_info = analysis.get("clip_best", "")[:150] + "..." if len(analysis.get("clip_best", "")) > 150 else analysis.get("clip_best", "")
-        analysis_info = f"""**🚀 ULTRA SUPREME MULTI-MODEL ANALYSIS COMPLETE**
-**Processing:** {gpu_status} • {duration:.1f}s • {caption_model} + Multi-Model Pipeline
-**Ultra Score:** {score}/100 • Models: {caption_model} + DeepFace + MediaPipe + Transformers
-**📊 BREAKDOWN:**
-• Prompt Quality: {breakdown.get('prompt_quality', 0)}/25
-• Analysis Depth: {breakdown.get('analysis_depth', 0)}/25
-• Model Confidence: {breakdown.get('model_confidence', 0)}/25
-• Feature Richness: {breakdown.get('feature_richness', 0)}/25
-**📝 VISION-LANGUAGE ANALYSIS:**
-**{caption_model} Caption:** {caption_info}
-**🧠 DEEP ANALYSIS RESULTS:**
-**👤 DEMOGRAPHICS & IDENTITY:**
-{demo_info or "No face detected for demographic analysis"}
-**😊 EMOTIONAL ANALYSIS:**
-{emotion_info or "No emotional data available"}
-**👁️ FACIAL ANALYSIS:**
-{face_info}
-**🚶 POSE & BODY LANGUAGE:**
-{pose_info or "No pose data available"}
-**🏞️ ENVIRONMENT & SCENE:**
-{env_info or "No environmental data detected"}
-**📊 INTELLIGENCE METRICS:**
-• **Total Features Detected:** {metrics['total_features_detected']}
-• **Analysis Depth Score:** {metrics['analysis_depth_score']}/100
-• **Model Confidence Average:** {metrics['model_confidence_average']:.0%}
-• **Technical Optimization:** {metrics['technical_optimization_score']}/100
-**✨ MULTI-MODEL ADVANTAGES:**
-✅ {caption_model}: State-of-the-art vision-language understanding
-✅ DeepFace: Accurate age, gender, emotion detection
-✅ MediaPipe: Body pose and gesture analysis
-✅ Transformers: Advanced emotion classification
-✅ OpenCV: Robust face detection
-**🔬 Powered by Pariente AI Research • Ultra Supreme Intelligence Engine**"""
-        return analysis_info