ACTIVIDAD_IA / app.py
Mapu142's picture
Create app.py
2bbbf2d verified
import gradio as gr
from transformers import pipeline, ViTImageProcessor, ViTForImageClassification, YolosImageProcessor, YolosForObjectDetection
from PIL import Image, ImageDraw
import torch
import numpy as np
class UniversalImageClassifier:
def __init__(self):
print("🔄 Cargando clasificador de imágenes ViT...")
self.model_name = "google/vit-base-patch16-224"
self.processor = ViTImageProcessor.from_pretrained(self.model_name)
self.model = ViTForImageClassification.from_pretrained(self.model_name)
self.classifier = pipeline(
"image-classification",
model=self.model,
feature_extractor=self.processor,
device=-1 # CPU
)
print("✅ Clasificador ViT cargado!")
self.category_mappings = {
'egyptian_cat': '🐱 Gato Egipcio',
'tabby': '🐱 Gato Atigrado',
'tiger_cat': '🐱 Gato Tiger',
'golden_retriever': '🐕 Golden Retriever',
'german_shepherd': '🐕 Pastor Alemán',
'beagle': '🐕 Beagle',
'sports_car': '🏎️ Auto Deportivo',
'convertible': '🚗 Convertible',
'motorcycle': '🏍️ Motocicleta',
'bicycle': '🚲 Bicicleta',
'airplane': '✈️ Avión',
'pizza': '🍕 Pizza',
'hamburger': '🍔 Hamburguesa',
'hot_dog': '🌭 Hot Dog',
'ice_cream': '🍦 Helado',
'laptop': '💻 Laptop',
'cellular_telephone': '📱 Teléfono Móvil',
'television': '📺 Televisión',
'daisy': '🌼 Margarita',
'rose': '🌹 Rosa',
'sunflower': '🌻 Girasol',
}
def classify_image(self, image):
try:
results = self.classifier(image)
predictions = []
for result in results[:5]:
label = result['label']
confidence = result['score'] * 100
display_label = self.category_mappings.get(label, f"🔍 {label.replace('_', ' ').title()}")
predictions.append({
'label': display_label,
'original_label': label,
'confidence': confidence
})
return predictions
except Exception as e:
return [{'label': f'Error: {str(e)}', 'confidence': 0}]
class ObjectDetector:
def __init__(self):
print("🔄 Cargando detector de objetos YOLOS...")
self.model_name = "hustvl/yolos-tiny"
self.processor = YolosImageProcessor.from_pretrained(self.model_name)
self.model = YolosForObjectDetection.from_pretrained(self.model_name)
print("✅ Detector YOLOS cargado!")
self.class_mappings = {
'person': '👤 Persona',
'bicycle': '🚲 Bicicleta',
'car': '🚗 Auto',
'motorcycle': '🏍️ Motocicleta',
'airplane': '✈️ Avión',
'bus': '🚌 Autobús',
'train': '🚂 Tren',
'truck': '🚛 Camión',
'boat': '⛵ Barco',
'traffic light': '🚦 Semáforo',
'bird': '🐦 Pájaro',
'cat': '🐱 Gato',
'dog': '🐕 Perro',
'horse': '🐎 Caballo',
'elephant': '🐘 Elefante',
'backpack': '🎒 Mochila',
'umbrella': '☂️ Paraguas',
'bottle': '🍼 Botella',
'cup': '☕ Taza',
'banana': '🍌 Plátano',
'apple': '🍎 Manzana',
'pizza': '🍕 Pizza',
'chair': '🪑 Silla',
'couch': '🛋️ Sofá',
'bed': '🛏️ Cama',
'tv': '📺 Televisión',
'laptop': '💻 Laptop',
'mouse': '🖱️ Mouse',
'cell phone': '📱 Teléfono',
'book': '📚 Libro',
'clock': '🕐 Reloj'
}
def detect_objects(self, image):
try:
inputs = self.processor(images=image, return_tensors="pt")
outputs = self.model(**inputs)
target_sizes = torch.tensor([image.size[::-1]])
results = self.processor.post_process_object_detection(
outputs, target_sizes=target_sizes, threshold=0.3
)[0]
detections = []
for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
if score > 0.3:
label_name = self.model.config.id2label[label.item()]
display_name = self.class_mappings.get(label_name, f"🔍 {label_name}")
detections.append({
'label': display_name,
'original_label': label_name,
'confidence': score.item() * 100,
'box': box.tolist()
})
return detections
except Exception as e:
return [{'label': f'Error: {str(e)}', 'confidence': 0, 'box': [0, 0, 0, 0]}]
def draw_detections(self, image, detections):
try:
annotated_image = image.copy()
draw = ImageDraw.Draw(annotated_image)
colors = ['red', 'blue', 'green', 'yellow', 'purple', 'orange', 'cyan', 'magenta']
for i, detection in enumerate(detections):
if detection['confidence'] > 30:
box = detection['box']
label = detection['label']
confidence = detection['confidence']
xmin, ymin, xmax, ymax = box
color = colors[i % len(colors)]
draw.rectangle([(xmin, ymin), (xmax, ymax)], outline=color, width=3)
text = f"{label} ({confidence:.1f}%)"
try:
text_bbox = draw.textbbox((0, 0), text)
text_width = text_bbox[2] - text_bbox[0]
text_height = text_bbox[3] - text_bbox[1]
draw.rectangle(
[(xmin, ymin - text_height - 4), (xmin + text_width + 4, ymin)],
fill=color
)
draw.text((xmin + 2, ymin - text_height - 2), text, fill='white')
except:
draw.text((xmin, ymin - 20), text, fill=color)
return annotated_image
except Exception as e:
return image
# Inicializar modelos
print("🚀 Inicializando modelos de IA...")
classifier = UniversalImageClassifier()
detector = ObjectDetector()
print("✅ ¡Todos los modelos listos!")
def classify_image_complete(image):
if image is None:
return "❌ Por favor sube una imagen para clasificar"
try:
predictions = classifier.classify_image(image)
if not predictions or predictions[0]['confidence'] == 0:
return "❌ No se pudo clasificar la imagen"
dominant = predictions[0]
report = f"""# 🔍 Clasificación de Imagen
## 🎯 Predicción Principal:
### {dominant['label']}
**Confianza:** {dominant['confidence']:.1f}%
## 📊 Top 5 Predicciones:
"""
for i, pred in enumerate(predictions, 1):
bar = "█" * int(pred['confidence'] / 10) + "░" * (10 - int(pred['confidence'] / 10))
report += f"\n**{i}.** {pred['label']}\n{bar} {pred['confidence']:.1f}%"
# Análisis de confianza
confidence = dominant['confidence']
if confidence > 80:
level = "🟢 Muy Alta"
elif confidence > 60:
level = "🟡 Alta"
elif confidence > 40:
level = "🟠 Moderada"
else:
level = "🔴 Baja"
report += f"\n\n## 🎚️ Confianza: {level}"
report += f"\n\n*Clasificación con Vision Transformer (ViT)*"
return report
except Exception as e:
return f"❌ Error: {str(e)}"
def detect_objects_complete(image):
if image is None:
return "❌ Por favor sube una imagen para detectar objetos", None
try:
detections = detector.detect_objects(image)
if not detections or detections[0]['confidence'] == 0:
return "❌ No se detectaron objetos", image
annotated_image = detector.draw_detections(image, detections)
object_counts = {}
for detection in detections:
if detection['confidence'] > 30:
label = detection['original_label']
object_counts[label] = object_counts.get(label, 0) + 1
total_objects = len(detections)
unique_objects = len(object_counts)
report = f"""# 🎯 Detección de Objetos
## 📊 Resumen:
- **Objetos detectados:** {total_objects}
- **Tipos únicos:** {unique_objects}
- **Confianza promedio:** {np.mean([d['confidence'] for d in detections]):.1f}%
## 🔍 Objetos Encontrados:
"""
sorted_detections = sorted(detections, key=lambda x: x['confidence'], reverse=True)
for i, detection in enumerate(sorted_detections[:10], 1): # Top 10
label = detection['label']
confidence = detection['confidence']
bar = "█" * int(confidence / 10) + "░" * (10 - int(confidence / 10))
report += f"\n**{i}.** {label}\n{bar} {confidence:.1f}%"
report += f"\n\n## 📈 Conteo por Tipo:"
for obj_type, count in sorted(object_counts.items(), key=lambda x: x[1], reverse=True)[:5]:
display_name = detector.class_mappings.get(obj_type, obj_type)
report += f"\n- {display_name}: **{count}**"
report += f"\n\n*Detección con YOLOS (You Only Look Once)*"
return report, annotated_image
except Exception as e:
return f"❌ Error: {str(e)}", None
# Interfaz Gradio
with gr.Blocks(title="🤖 Analizador Visual Universal con IA") as demo:
gr.Markdown("""
# 🤖 Analizador Visual Universal con IA
**Dos poderosos modelos de IA en una sola aplicación**
🔍 **Clasificador Universal:** Identifica QUÉ ES (1000+ categorías)
🎯 **Detector de Objetos:** Encuentra DÓNDE ESTÁN (80+ objetos)
✨ **Modelos incluidos:**
- 🧠 Vision Transformer (ViT) de Google
- 🎯 YOLOS (You Only Look Once)
- 💻 100% optimizado para CPU
""")
with gr.Tabs():
# Tab 1: Clasificador
with gr.Tab("🔍 Clasificador Universal"):
gr.Markdown("""
### Identifica automáticamente el contenido principal de tu imagen
**Perfecto para:** Catalogar fotos, identificar objetos desconocidos, análisis de contenido
""")
with gr.Row():
with gr.Column(scale=1):
classify_input = gr.Image(
label="📸 Sube tu imagen",
type="pil"
)
classify_btn = gr.Button(
"🚀 Clasificar Imagen",
variant="primary",
size="lg"
)
with gr.Column(scale=2):
classify_output = gr.Markdown(
label="📋 Resultado de Clasificación"
)
# gr.Examples(
# examples=[
# "https://images.unsplash.com/photo-1574158622682-e40e69881006?w=300", # Gato
# "https://images.unsplash.com/photo-1552053831-71594a27632d?w=300", # Perro
# "https://images.unsplash.com/photo-1565299624946-b28f40a0ca4b?w=300" # Pizza
# ],
# inputs=[classify_input],
# label="🖼️ Ejemplos para probar"
# )
# Tab 2: Detector de Objetos
with gr.Tab("🎯 Detector de Objetos"):
gr.Markdown("""
### Encuentra y localiza múltiples objetos en tu imagen
**Perfecto para:** Análisis de escenas, inventarios visuales, seguridad
""")
with gr.Row():
with gr.Column(scale=1):
detect_input = gr.Image(
label="📸 Sube tu imagen",
type="pil"
)
detect_btn = gr.Button(
"🎯 Detectar Objetos",
variant="primary",
size="lg"
)
with gr.Column(scale=1):
detect_output = gr.Markdown(
label="📋 Objetos Detectados"
)
detect_image_output = gr.Image(
label="🎯 Imagen Anotada",
type="pil"
)
# Eventos
classify_btn.click(
classify_image_complete,
inputs=[classify_input],
outputs=[classify_output]
)
detect_btn.click(
detect_objects_complete,
inputs=[detect_input],
outputs=[detect_output, detect_image_output]
)
gr.Markdown("""
---
### 💡 Consejos para mejores resultados:
- Usa imágenes claras y bien iluminadas
- Centra el objeto principal para clasificación
- Para detección, incluye múltiples objetos en la escena
- Resolución mínima recomendada: 224x224 píxeles
**🚀 Powered by Hugging Face Transformers**
""")
if __name__ == "__main__":
demo.launch()