Spaces:

Mapu142
/

ACTIVIDAD_IA

Sleeping

File size: 14,125 Bytes

2bbbf2d

import gradio as gr
from transformers import pipeline, ViTImageProcessor, ViTForImageClassification, YolosImageProcessor, YolosForObjectDetection
from PIL import Image, ImageDraw
import torch
import numpy as np

class UniversalImageClassifier:
    def __init__(self):
        print("🔄 Cargando clasificador de imágenes ViT...")
        self.model_name = "google/vit-base-patch16-224"
        self.processor = ViTImageProcessor.from_pretrained(self.model_name)
        self.model = ViTForImageClassification.from_pretrained(self.model_name)
        
        self.classifier = pipeline(
            "image-classification",
            model=self.model,
            feature_extractor=self.processor,
            device=-1  # CPU
        )
        
        print("✅ Clasificador ViT cargado!")
        
        self.category_mappings = {
            'egyptian_cat': '🐱 Gato Egipcio',
            'tabby': '🐱 Gato Atigrado', 
            'tiger_cat': '🐱 Gato Tiger',
            'golden_retriever': '🐕 Golden Retriever',
            'german_shepherd': '🐕 Pastor Alemán',
            'beagle': '🐕 Beagle',
            'sports_car': '🏎️ Auto Deportivo',
            'convertible': '🚗 Convertible',
            'motorcycle': '🏍️ Motocicleta',
            'bicycle': '🚲 Bicicleta',
            'airplane': '✈️ Avión',
            'pizza': '🍕 Pizza',
            'hamburger': '🍔 Hamburguesa',
            'hot_dog': '🌭 Hot Dog',
            'ice_cream': '🍦 Helado',
            'laptop': '💻 Laptop',
            'cellular_telephone': '📱 Teléfono Móvil',
            'television': '📺 Televisión',
            'daisy': '🌼 Margarita',
            'rose': '🌹 Rosa',
            'sunflower': '🌻 Girasol',
        }
    
    def classify_image(self, image):
        try:
            results = self.classifier(image)
            predictions = []
            for result in results[:5]:
                label = result['label']
                confidence = result['score'] * 100
                display_label = self.category_mappings.get(label, f"🔍 {label.replace('_', ' ').title()}")
                
                predictions.append({
                    'label': display_label,
                    'original_label': label,
                    'confidence': confidence
                })
            
            return predictions
        except Exception as e:
            return [{'label': f'Error: {str(e)}', 'confidence': 0}]

class ObjectDetector:
    def __init__(self):
        print("🔄 Cargando detector de objetos YOLOS...")
        self.model_name = "hustvl/yolos-tiny"
        self.processor = YolosImageProcessor.from_pretrained(self.model_name)
        self.model = YolosForObjectDetection.from_pretrained(self.model_name)
        
        print("✅ Detector YOLOS cargado!")
        
        self.class_mappings = {
            'person': '👤 Persona',
            'bicycle': '🚲 Bicicleta', 
            'car': '🚗 Auto',
            'motorcycle': '🏍️ Motocicleta',
            'airplane': '✈️ Avión',
            'bus': '🚌 Autobús',
            'train': '🚂 Tren',
            'truck': '🚛 Camión',
            'boat': '⛵ Barco',
            'traffic light': '🚦 Semáforo',
            'bird': '🐦 Pájaro',
            'cat': '🐱 Gato',
            'dog': '🐕 Perro',
            'horse': '🐎 Caballo',
            'elephant': '🐘 Elefante',
            'backpack': '🎒 Mochila',
            'umbrella': '☂️ Paraguas',
            'bottle': '🍼 Botella',
            'cup': '☕ Taza',
            'banana': '🍌 Plátano',
            'apple': '🍎 Manzana',
            'pizza': '🍕 Pizza',
            'chair': '🪑 Silla',
            'couch': '🛋️ Sofá',
            'bed': '🛏️ Cama',
            'tv': '📺 Televisión',
            'laptop': '💻 Laptop',
            'mouse': '🖱️ Mouse',
            'cell phone': '📱 Teléfono',
            'book': '📚 Libro',
            'clock': '🕐 Reloj'
        }
    
    def detect_objects(self, image):
        try:
            inputs = self.processor(images=image, return_tensors="pt")
            outputs = self.model(**inputs)
            
            target_sizes = torch.tensor([image.size[::-1]])
            results = self.processor.post_process_object_detection(
                outputs, target_sizes=target_sizes, threshold=0.3
            )[0]
            
            detections = []
            for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
                if score > 0.3:
                    label_name = self.model.config.id2label[label.item()]
                    display_name = self.class_mappings.get(label_name, f"🔍 {label_name}")
                    
                    detections.append({
                        'label': display_name,
                        'original_label': label_name,
                        'confidence': score.item() * 100,
                        'box': box.tolist()
                    })
            
            return detections
        except Exception as e:
            return [{'label': f'Error: {str(e)}', 'confidence': 0, 'box': [0, 0, 0, 0]}]
    
    def draw_detections(self, image, detections):
        try:
            annotated_image = image.copy()
            draw = ImageDraw.Draw(annotated_image)
            
            colors = ['red', 'blue', 'green', 'yellow', 'purple', 'orange', 'cyan', 'magenta']
            
            for i, detection in enumerate(detections):
                if detection['confidence'] > 30:
                    box = detection['box']
                    label = detection['label']
                    confidence = detection['confidence']
                    
                    xmin, ymin, xmax, ymax = box
                    color = colors[i % len(colors)]
                    
                    draw.rectangle([(xmin, ymin), (xmax, ymax)], outline=color, width=3)
                    
                    text = f"{label} ({confidence:.1f}%)"
                    try:
                        text_bbox = draw.textbbox((0, 0), text)
                        text_width = text_bbox[2] - text_bbox[0]
                        text_height = text_bbox[3] - text_bbox[1]
                        
                        draw.rectangle(
                            [(xmin, ymin - text_height - 4), (xmin + text_width + 4, ymin)],
                            fill=color
                        )
                        draw.text((xmin + 2, ymin - text_height - 2), text, fill='white')
                    except:
                        draw.text((xmin, ymin - 20), text, fill=color)
            
            return annotated_image
        except Exception as e:
            return image

# Inicializar modelos
print("🚀 Inicializando modelos de IA...")
classifier = UniversalImageClassifier()
detector = ObjectDetector()
print("✅ ¡Todos los modelos listos!")

def classify_image_complete(image):
    if image is None:
        return "❌ Por favor sube una imagen para clasificar"
    
    try:
        predictions = classifier.classify_image(image)
        
        if not predictions or predictions[0]['confidence'] == 0:
            return "❌ No se pudo clasificar la imagen"
        
        dominant = predictions[0]
        
        report = f"""# 🔍 Clasificación de Imagen
## 🎯 Predicción Principal:
### {dominant['label']} 
**Confianza:** {dominant['confidence']:.1f}%
## 📊 Top 5 Predicciones:
"""
        
        for i, pred in enumerate(predictions, 1):
            bar = "█" * int(pred['confidence'] / 10) + "░" * (10 - int(pred['confidence'] / 10))
            report += f"\n**{i}.** {pred['label']}\n{bar} {pred['confidence']:.1f}%"
        
        # Análisis de confianza
        confidence = dominant['confidence']
        if confidence > 80:
            level = "🟢 Muy Alta"
        elif confidence > 60:
            level = "🟡 Alta"
        elif confidence > 40:
            level = "🟠 Moderada"
        else:
            level = "🔴 Baja"
        
        report += f"\n\n## 🎚️ Confianza: {level}"
        report += f"\n\n*Clasificación con Vision Transformer (ViT)*"
        
        return report
        
    except Exception as e:
        return f"❌ Error: {str(e)}"

def detect_objects_complete(image):
    if image is None:
        return "❌ Por favor sube una imagen para detectar objetos", None
    
    try:
        detections = detector.detect_objects(image)
        
        if not detections or detections[0]['confidence'] == 0:
            return "❌ No se detectaron objetos", image
        
        annotated_image = detector.draw_detections(image, detections)
        
        object_counts = {}
        for detection in detections:
            if detection['confidence'] > 30:
                label = detection['original_label']
                object_counts[label] = object_counts.get(label, 0) + 1
        
        total_objects = len(detections)
        unique_objects = len(object_counts)
        
        report = f"""# 🎯 Detección de Objetos
## 📊 Resumen:
- **Objetos detectados:** {total_objects}
- **Tipos únicos:** {unique_objects}
- **Confianza promedio:** {np.mean([d['confidence'] for d in detections]):.1f}%
## 🔍 Objetos Encontrados:
"""
        
        sorted_detections = sorted(detections, key=lambda x: x['confidence'], reverse=True)
        
        for i, detection in enumerate(sorted_detections[:10], 1):  # Top 10
            label = detection['label']
            confidence = detection['confidence']
            bar = "█" * int(confidence / 10) + "░" * (10 - int(confidence / 10))
            
            report += f"\n**{i}.** {label}\n{bar} {confidence:.1f}%"
        
        report += f"\n\n## 📈 Conteo por Tipo:"
        for obj_type, count in sorted(object_counts.items(), key=lambda x: x[1], reverse=True)[:5]:
            display_name = detector.class_mappings.get(obj_type, obj_type)
            report += f"\n- {display_name}: **{count}**"
        
        report += f"\n\n*Detección con YOLOS (You Only Look Once)*"
        
        return report, annotated_image
        
    except Exception as e:
        return f"❌ Error: {str(e)}", None

# Interfaz Gradio
with gr.Blocks(title="🤖 Analizador Visual Universal con IA") as demo:
    
    gr.Markdown("""
    # 🤖 Analizador Visual Universal con IA
    
    **Dos poderosos modelos de IA en una sola aplicación**
    
    🔍 **Clasificador Universal:** Identifica QUÉ ES (1000+ categorías)  
    🎯 **Detector de Objetos:** Encuentra DÓNDE ESTÁN (80+ objetos)
    
    ✨ **Modelos incluidos:**
    - 🧠 Vision Transformer (ViT) de Google
    - 🎯 YOLOS (You Only Look Once)
    - 💻 100% optimizado para CPU
    """)
    
    with gr.Tabs():
        # Tab 1: Clasificador
        with gr.Tab("🔍 Clasificador Universal"):
            gr.Markdown("""
            ### Identifica automáticamente el contenido principal de tu imagen
            **Perfecto para:** Catalogar fotos, identificar objetos desconocidos, análisis de contenido
            """)
            
            with gr.Row():
                with gr.Column(scale=1):
                    classify_input = gr.Image(
                        label="📸 Sube tu imagen",
                        type="pil"
                    )
                    classify_btn = gr.Button(
                        "🚀 Clasificar Imagen", 
                        variant="primary",
                        size="lg"
                    )
                
                with gr.Column(scale=2):
                    classify_output = gr.Markdown(
                        label="📋 Resultado de Clasificación"
                    )
            
            # gr.Examples(
            #     examples=[
            #         "https://images.unsplash.com/photo-1574158622682-e40e69881006?w=300",  # Gato
            #         "https://images.unsplash.com/photo-1552053831-71594a27632d?w=300",  # Perro
            #         "https://images.unsplash.com/photo-1565299624946-b28f40a0ca4b?w=300"   # Pizza
            #     ],
            #     inputs=[classify_input],
            #     label="🖼️ Ejemplos para probar"
            # )
        
        # Tab 2: Detector de Objetos
        with gr.Tab("🎯 Detector de Objetos"):
            gr.Markdown("""
            ### Encuentra y localiza múltiples objetos en tu imagen
            **Perfecto para:** Análisis de escenas, inventarios visuales, seguridad
            """)
            
            with gr.Row():
                with gr.Column(scale=1):
                    detect_input = gr.Image(
                        label="📸 Sube tu imagen",
                        type="pil"
                    )
                    detect_btn = gr.Button(
                        "🎯 Detectar Objetos", 
                        variant="primary",
                        size="lg"
                    )
                
                with gr.Column(scale=1):
                    detect_output = gr.Markdown(
                        label="📋 Objetos Detectados"
                    )
                    detect_image_output = gr.Image(
                        label="🎯 Imagen Anotada",
                        type="pil"
                    )
    
    # Eventos
    classify_btn.click(
        classify_image_complete,
        inputs=[classify_input],
        outputs=[classify_output]
    )
    
    detect_btn.click(
        detect_objects_complete,
        inputs=[detect_input],
        outputs=[detect_output, detect_image_output]
    )
    
    gr.Markdown("""
    ---
    ### 💡 Consejos para mejores resultados:
    - Usa imágenes claras y bien iluminadas
    - Centra el objeto principal para clasificación
    - Para detección, incluye múltiples objetos en la escena
    - Resolución mínima recomendada: 224x224 píxeles
    
    **🚀 Powered by Hugging Face Transformers**
    """)

if __name__ == "__main__":
    demo.launch()