Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import pipeline, ViTImageProcessor, ViTForImageClassification, YolosImageProcessor, YolosForObjectDetection | |
| from PIL import Image, ImageDraw | |
| import torch | |
| import numpy as np | |
| class UniversalImageClassifier: | |
| def __init__(self): | |
| print("🔄 Cargando clasificador de imágenes ViT...") | |
| self.model_name = "google/vit-base-patch16-224" | |
| self.processor = ViTImageProcessor.from_pretrained(self.model_name) | |
| self.model = ViTForImageClassification.from_pretrained(self.model_name) | |
| self.classifier = pipeline( | |
| "image-classification", | |
| model=self.model, | |
| feature_extractor=self.processor, | |
| device=-1 # CPU | |
| ) | |
| print("✅ Clasificador ViT cargado!") | |
| self.category_mappings = { | |
| 'egyptian_cat': '🐱 Gato Egipcio', | |
| 'tabby': '🐱 Gato Atigrado', | |
| 'tiger_cat': '🐱 Gato Tiger', | |
| 'golden_retriever': '🐕 Golden Retriever', | |
| 'german_shepherd': '🐕 Pastor Alemán', | |
| 'beagle': '🐕 Beagle', | |
| 'sports_car': '🏎️ Auto Deportivo', | |
| 'convertible': '🚗 Convertible', | |
| 'motorcycle': '🏍️ Motocicleta', | |
| 'bicycle': '🚲 Bicicleta', | |
| 'airplane': '✈️ Avión', | |
| 'pizza': '🍕 Pizza', | |
| 'hamburger': '🍔 Hamburguesa', | |
| 'hot_dog': '🌭 Hot Dog', | |
| 'ice_cream': '🍦 Helado', | |
| 'laptop': '💻 Laptop', | |
| 'cellular_telephone': '📱 Teléfono Móvil', | |
| 'television': '📺 Televisión', | |
| 'daisy': '🌼 Margarita', | |
| 'rose': '🌹 Rosa', | |
| 'sunflower': '🌻 Girasol', | |
| } | |
| def classify_image(self, image): | |
| try: | |
| results = self.classifier(image) | |
| predictions = [] | |
| for result in results[:5]: | |
| label = result['label'] | |
| confidence = result['score'] * 100 | |
| display_label = self.category_mappings.get(label, f"🔍 {label.replace('_', ' ').title()}") | |
| predictions.append({ | |
| 'label': display_label, | |
| 'original_label': label, | |
| 'confidence': confidence | |
| }) | |
| return predictions | |
| except Exception as e: | |
| return [{'label': f'Error: {str(e)}', 'confidence': 0}] | |
| class ObjectDetector: | |
| def __init__(self): | |
| print("🔄 Cargando detector de objetos YOLOS...") | |
| self.model_name = "hustvl/yolos-tiny" | |
| self.processor = YolosImageProcessor.from_pretrained(self.model_name) | |
| self.model = YolosForObjectDetection.from_pretrained(self.model_name) | |
| print("✅ Detector YOLOS cargado!") | |
| self.class_mappings = { | |
| 'person': '👤 Persona', | |
| 'bicycle': '🚲 Bicicleta', | |
| 'car': '🚗 Auto', | |
| 'motorcycle': '🏍️ Motocicleta', | |
| 'airplane': '✈️ Avión', | |
| 'bus': '🚌 Autobús', | |
| 'train': '🚂 Tren', | |
| 'truck': '🚛 Camión', | |
| 'boat': '⛵ Barco', | |
| 'traffic light': '🚦 Semáforo', | |
| 'bird': '🐦 Pájaro', | |
| 'cat': '🐱 Gato', | |
| 'dog': '🐕 Perro', | |
| 'horse': '🐎 Caballo', | |
| 'elephant': '🐘 Elefante', | |
| 'backpack': '🎒 Mochila', | |
| 'umbrella': '☂️ Paraguas', | |
| 'bottle': '🍼 Botella', | |
| 'cup': '☕ Taza', | |
| 'banana': '🍌 Plátano', | |
| 'apple': '🍎 Manzana', | |
| 'pizza': '🍕 Pizza', | |
| 'chair': '🪑 Silla', | |
| 'couch': '🛋️ Sofá', | |
| 'bed': '🛏️ Cama', | |
| 'tv': '📺 Televisión', | |
| 'laptop': '💻 Laptop', | |
| 'mouse': '🖱️ Mouse', | |
| 'cell phone': '📱 Teléfono', | |
| 'book': '📚 Libro', | |
| 'clock': '🕐 Reloj' | |
| } | |
| def detect_objects(self, image): | |
| try: | |
| inputs = self.processor(images=image, return_tensors="pt") | |
| outputs = self.model(**inputs) | |
| target_sizes = torch.tensor([image.size[::-1]]) | |
| results = self.processor.post_process_object_detection( | |
| outputs, target_sizes=target_sizes, threshold=0.3 | |
| )[0] | |
| detections = [] | |
| for score, label, box in zip(results["scores"], results["labels"], results["boxes"]): | |
| if score > 0.3: | |
| label_name = self.model.config.id2label[label.item()] | |
| display_name = self.class_mappings.get(label_name, f"🔍 {label_name}") | |
| detections.append({ | |
| 'label': display_name, | |
| 'original_label': label_name, | |
| 'confidence': score.item() * 100, | |
| 'box': box.tolist() | |
| }) | |
| return detections | |
| except Exception as e: | |
| return [{'label': f'Error: {str(e)}', 'confidence': 0, 'box': [0, 0, 0, 0]}] | |
| def draw_detections(self, image, detections): | |
| try: | |
| annotated_image = image.copy() | |
| draw = ImageDraw.Draw(annotated_image) | |
| colors = ['red', 'blue', 'green', 'yellow', 'purple', 'orange', 'cyan', 'magenta'] | |
| for i, detection in enumerate(detections): | |
| if detection['confidence'] > 30: | |
| box = detection['box'] | |
| label = detection['label'] | |
| confidence = detection['confidence'] | |
| xmin, ymin, xmax, ymax = box | |
| color = colors[i % len(colors)] | |
| draw.rectangle([(xmin, ymin), (xmax, ymax)], outline=color, width=3) | |
| text = f"{label} ({confidence:.1f}%)" | |
| try: | |
| text_bbox = draw.textbbox((0, 0), text) | |
| text_width = text_bbox[2] - text_bbox[0] | |
| text_height = text_bbox[3] - text_bbox[1] | |
| draw.rectangle( | |
| [(xmin, ymin - text_height - 4), (xmin + text_width + 4, ymin)], | |
| fill=color | |
| ) | |
| draw.text((xmin + 2, ymin - text_height - 2), text, fill='white') | |
| except: | |
| draw.text((xmin, ymin - 20), text, fill=color) | |
| return annotated_image | |
| except Exception as e: | |
| return image | |
| # Inicializar modelos | |
| print("🚀 Inicializando modelos de IA...") | |
| classifier = UniversalImageClassifier() | |
| detector = ObjectDetector() | |
| print("✅ ¡Todos los modelos listos!") | |
| def classify_image_complete(image): | |
| if image is None: | |
| return "❌ Por favor sube una imagen para clasificar" | |
| try: | |
| predictions = classifier.classify_image(image) | |
| if not predictions or predictions[0]['confidence'] == 0: | |
| return "❌ No se pudo clasificar la imagen" | |
| dominant = predictions[0] | |
| report = f"""# 🔍 Clasificación de Imagen | |
| ## 🎯 Predicción Principal: | |
| ### {dominant['label']} | |
| **Confianza:** {dominant['confidence']:.1f}% | |
| ## 📊 Top 5 Predicciones: | |
| """ | |
| for i, pred in enumerate(predictions, 1): | |
| bar = "█" * int(pred['confidence'] / 10) + "░" * (10 - int(pred['confidence'] / 10)) | |
| report += f"\n**{i}.** {pred['label']}\n{bar} {pred['confidence']:.1f}%" | |
| # Análisis de confianza | |
| confidence = dominant['confidence'] | |
| if confidence > 80: | |
| level = "🟢 Muy Alta" | |
| elif confidence > 60: | |
| level = "🟡 Alta" | |
| elif confidence > 40: | |
| level = "🟠 Moderada" | |
| else: | |
| level = "🔴 Baja" | |
| report += f"\n\n## 🎚️ Confianza: {level}" | |
| report += f"\n\n*Clasificación con Vision Transformer (ViT)*" | |
| return report | |
| except Exception as e: | |
| return f"❌ Error: {str(e)}" | |
| def detect_objects_complete(image): | |
| if image is None: | |
| return "❌ Por favor sube una imagen para detectar objetos", None | |
| try: | |
| detections = detector.detect_objects(image) | |
| if not detections or detections[0]['confidence'] == 0: | |
| return "❌ No se detectaron objetos", image | |
| annotated_image = detector.draw_detections(image, detections) | |
| object_counts = {} | |
| for detection in detections: | |
| if detection['confidence'] > 30: | |
| label = detection['original_label'] | |
| object_counts[label] = object_counts.get(label, 0) + 1 | |
| total_objects = len(detections) | |
| unique_objects = len(object_counts) | |
| report = f"""# 🎯 Detección de Objetos | |
| ## 📊 Resumen: | |
| - **Objetos detectados:** {total_objects} | |
| - **Tipos únicos:** {unique_objects} | |
| - **Confianza promedio:** {np.mean([d['confidence'] for d in detections]):.1f}% | |
| ## 🔍 Objetos Encontrados: | |
| """ | |
| sorted_detections = sorted(detections, key=lambda x: x['confidence'], reverse=True) | |
| for i, detection in enumerate(sorted_detections[:10], 1): # Top 10 | |
| label = detection['label'] | |
| confidence = detection['confidence'] | |
| bar = "█" * int(confidence / 10) + "░" * (10 - int(confidence / 10)) | |
| report += f"\n**{i}.** {label}\n{bar} {confidence:.1f}%" | |
| report += f"\n\n## 📈 Conteo por Tipo:" | |
| for obj_type, count in sorted(object_counts.items(), key=lambda x: x[1], reverse=True)[:5]: | |
| display_name = detector.class_mappings.get(obj_type, obj_type) | |
| report += f"\n- {display_name}: **{count}**" | |
| report += f"\n\n*Detección con YOLOS (You Only Look Once)*" | |
| return report, annotated_image | |
| except Exception as e: | |
| return f"❌ Error: {str(e)}", None | |
| # Interfaz Gradio | |
| with gr.Blocks(title="🤖 Analizador Visual Universal con IA") as demo: | |
| gr.Markdown(""" | |
| # 🤖 Analizador Visual Universal con IA | |
| **Dos poderosos modelos de IA en una sola aplicación** | |
| 🔍 **Clasificador Universal:** Identifica QUÉ ES (1000+ categorías) | |
| 🎯 **Detector de Objetos:** Encuentra DÓNDE ESTÁN (80+ objetos) | |
| ✨ **Modelos incluidos:** | |
| - 🧠 Vision Transformer (ViT) de Google | |
| - 🎯 YOLOS (You Only Look Once) | |
| - 💻 100% optimizado para CPU | |
| """) | |
| with gr.Tabs(): | |
| # Tab 1: Clasificador | |
| with gr.Tab("🔍 Clasificador Universal"): | |
| gr.Markdown(""" | |
| ### Identifica automáticamente el contenido principal de tu imagen | |
| **Perfecto para:** Catalogar fotos, identificar objetos desconocidos, análisis de contenido | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| classify_input = gr.Image( | |
| label="📸 Sube tu imagen", | |
| type="pil" | |
| ) | |
| classify_btn = gr.Button( | |
| "🚀 Clasificar Imagen", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| with gr.Column(scale=2): | |
| classify_output = gr.Markdown( | |
| label="📋 Resultado de Clasificación" | |
| ) | |
| # gr.Examples( | |
| # examples=[ | |
| # "https://images.unsplash.com/photo-1574158622682-e40e69881006?w=300", # Gato | |
| # "https://images.unsplash.com/photo-1552053831-71594a27632d?w=300", # Perro | |
| # "https://images.unsplash.com/photo-1565299624946-b28f40a0ca4b?w=300" # Pizza | |
| # ], | |
| # inputs=[classify_input], | |
| # label="🖼️ Ejemplos para probar" | |
| # ) | |
| # Tab 2: Detector de Objetos | |
| with gr.Tab("🎯 Detector de Objetos"): | |
| gr.Markdown(""" | |
| ### Encuentra y localiza múltiples objetos en tu imagen | |
| **Perfecto para:** Análisis de escenas, inventarios visuales, seguridad | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| detect_input = gr.Image( | |
| label="📸 Sube tu imagen", | |
| type="pil" | |
| ) | |
| detect_btn = gr.Button( | |
| "🎯 Detectar Objetos", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| with gr.Column(scale=1): | |
| detect_output = gr.Markdown( | |
| label="📋 Objetos Detectados" | |
| ) | |
| detect_image_output = gr.Image( | |
| label="🎯 Imagen Anotada", | |
| type="pil" | |
| ) | |
| # Eventos | |
| classify_btn.click( | |
| classify_image_complete, | |
| inputs=[classify_input], | |
| outputs=[classify_output] | |
| ) | |
| detect_btn.click( | |
| detect_objects_complete, | |
| inputs=[detect_input], | |
| outputs=[detect_output, detect_image_output] | |
| ) | |
| gr.Markdown(""" | |
| --- | |
| ### 💡 Consejos para mejores resultados: | |
| - Usa imágenes claras y bien iluminadas | |
| - Centra el objeto principal para clasificación | |
| - Para detección, incluye múltiples objetos en la escena | |
| - Resolución mínima recomendada: 224x224 píxeles | |
| **🚀 Powered by Hugging Face Transformers** | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch() |