artscope subida archivos
Browse files- README.md +33 -8
- app.py +153 -0
- requirements.txt +10 -0
README.md
CHANGED
|
@@ -1,14 +1,39 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version: 6.
|
| 8 |
-
python_version: '3.13'
|
| 9 |
app_file: app.py
|
| 10 |
pinned: false
|
| 11 |
-
license: mit
|
| 12 |
---
|
| 13 |
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: ArtScope
|
| 3 |
+
emoji: 🎨
|
| 4 |
+
colorFrom: indigo
|
| 5 |
+
colorTo: pink
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 6.11.0
|
|
|
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
|
|
|
| 10 |
---
|
| 11 |
|
| 12 |
+
# 🎨 ArtScope
|
| 13 |
+
|
| 14 |
+
Clasificador de movimientos artísticos basado en un ensemble heterogéneo
|
| 15 |
+
**ConvNeXt-tiny + ViT-small**, con mapa **Grad-CAM** y descripción del estilo
|
| 16 |
+
generada por **Claude** (Anthropic).
|
| 17 |
+
|
| 18 |
+
Trabajo final de la asignatura de Computer Vision del máster MIOTI.
|
| 19 |
+
|
| 20 |
+
## Cómo funciona
|
| 21 |
+
|
| 22 |
+
1. Subes una imagen de un cuadro.
|
| 23 |
+
2. Dos modelos (una CNN moderna y un Vision Transformer) predicen el movimiento por
|
| 24 |
+
separado y sus probabilidades se promedian (ensemble).
|
| 25 |
+
3. Se calcula un mapa Grad-CAM sobre el ConvNeXt para visualizar las regiones más
|
| 26 |
+
influyentes en la predicción.
|
| 27 |
+
4. Se envía el top-1 y top-2 a Claude, que devuelve una descripción contextual del
|
| 28 |
+
estilo.
|
| 29 |
+
|
| 30 |
+
## Movimientos soportados
|
| 31 |
+
|
| 32 |
+
Impressionism · Post-Impressionism · Realism · Romanticism · Expressionism · Cubism ·
|
| 33 |
+
Surrealism · Abstract Expressionism · Baroque · Northern Renaissance
|
| 34 |
+
|
| 35 |
+
## Configuración
|
| 36 |
+
|
| 37 |
+
La descripción generativa requiere una variable de entorno `ANTHROPIC_API_KEY`
|
| 38 |
+
(Settings → Variables and secrets). Si no se proporciona, la app sigue funcionando
|
| 39 |
+
y muestra un mensaje en su lugar.
|
app.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""ArtScope — app de Gradio para HuggingFace Space.
|
| 2 |
+
|
| 3 |
+
Pipeline:
|
| 4 |
+
imagen del cuadro
|
| 5 |
+
-> predicción con ConvNeXt + ViT en ensemble (promedio de probas)
|
| 6 |
+
-> Grad-CAM sobre el ConvNeXt
|
| 7 |
+
-> descripción del estilo predicho con Claude (opcional)
|
| 8 |
+
|
| 9 |
+
Pasos para desplegar:
|
| 10 |
+
1. Crea un Space (Gradio) en HuggingFace.
|
| 11 |
+
2. Sube este archivo + requirements.txt + README.md.
|
| 12 |
+
3. En Settings -> Variables and secrets, añade ANTHROPIC_API_KEY (opcional).
|
| 13 |
+
4. Sustituye HF_USER por tu usuario antes de subir.
|
| 14 |
+
"""
|
| 15 |
+
import os
|
| 16 |
+
import gradio as gr
|
| 17 |
+
import torch
|
| 18 |
+
import numpy as np
|
| 19 |
+
from PIL import Image
|
| 20 |
+
from torchvision import transforms as T
|
| 21 |
+
|
| 22 |
+
from huggingface_hub import from_pretrained_fastai
|
| 23 |
+
from fastai.vision.all import PILImage
|
| 24 |
+
from pytorch_grad_cam import GradCAM
|
| 25 |
+
from pytorch_grad_cam.utils.image import show_cam_on_image
|
| 26 |
+
|
| 27 |
+
# ---------------------------------------------------------------
|
| 28 |
+
# Configuración
|
| 29 |
+
# ---------------------------------------------------------------
|
| 30 |
+
HF_USER = "tu_usuario" # <-- sustitúyelo por tu usuario de HF antes de subir
|
| 31 |
+
REPO_CNN = f"{HF_USER}/artscope-convnext"
|
| 32 |
+
REPO_VIT = f"{HF_USER}/artscope-vit"
|
| 33 |
+
|
| 34 |
+
# ---------------------------------------------------------------
|
| 35 |
+
# Carga de modelos (al iniciar el Space, una sola vez)
|
| 36 |
+
# ---------------------------------------------------------------
|
| 37 |
+
print("Descargando modelos del Hub...")
|
| 38 |
+
learn_cnn = from_pretrained_fastai(REPO_CNN)
|
| 39 |
+
learn_vit = from_pretrained_fastai(REPO_VIT)
|
| 40 |
+
LABELS = list(learn_cnn.dls.vocab)
|
| 41 |
+
print(f"Modelos listos. {len(LABELS)} clases: {LABELS}")
|
| 42 |
+
|
| 43 |
+
# Grad-CAM lo montamos solo sobre el ConvNeXt: es más rápido y la visualización
|
| 44 |
+
# es más interpretable en este tipo de arquitectura.
|
| 45 |
+
learn_cnn.model.eval()
|
| 46 |
+
_target_layer = learn_cnn.model[0].stages[-1].blocks[-1]
|
| 47 |
+
cam = GradCAM(model=learn_cnn.model, target_layers=[_target_layer])
|
| 48 |
+
|
| 49 |
+
# ---------------------------------------------------------------
|
| 50 |
+
# Cliente Claude (opcional — la app funciona sin él)
|
| 51 |
+
# ---------------------------------------------------------------
|
| 52 |
+
ANTHROPIC_KEY = os.environ.get("ANTHROPIC_API_KEY")
|
| 53 |
+
anthropic_client = None
|
| 54 |
+
if ANTHROPIC_KEY:
|
| 55 |
+
try:
|
| 56 |
+
from anthropic import Anthropic
|
| 57 |
+
anthropic_client = Anthropic(api_key=ANTHROPIC_KEY)
|
| 58 |
+
print("Cliente Anthropic inicializado.")
|
| 59 |
+
except Exception as e:
|
| 60 |
+
print(f"Aviso: no se pudo inicializar Anthropic ({e}).")
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def llm_describe(top_style: str, second_style: str) -> str:
|
| 64 |
+
"""Genera una descripción del movimiento detectado usando Claude."""
|
| 65 |
+
if anthropic_client is None:
|
| 66 |
+
return (
|
| 67 |
+
"_(Descripción LLM desactivada. Para activarla, añade "
|
| 68 |
+
"`ANTHROPIC_API_KEY` en los Secrets del Space.)_"
|
| 69 |
+
)
|
| 70 |
+
prompt = (
|
| 71 |
+
f"Eres un guía de museo experto. Acabo de mostrar un cuadro a un clasificador "
|
| 72 |
+
f"y dice que es {top_style.replace('_', ' ')}, con {second_style.replace('_', ' ')} "
|
| 73 |
+
f"como segunda opción. En 4-5 frases en español, explica qué rasgos visuales "
|
| 74 |
+
f"definen al {top_style.replace('_', ' ')} y por qué podría confundirse con "
|
| 75 |
+
f"{second_style.replace('_', ' ')}. Tono divulgativo, sin tecnicismos innecesarios."
|
| 76 |
+
)
|
| 77 |
+
try:
|
| 78 |
+
msg = anthropic_client.messages.create(
|
| 79 |
+
model="claude-haiku-4-5",
|
| 80 |
+
max_tokens=400,
|
| 81 |
+
messages=[{"role": "user", "content": prompt}],
|
| 82 |
+
)
|
| 83 |
+
return msg.content[0].text
|
| 84 |
+
except Exception as e:
|
| 85 |
+
return f"_(Error llamando a Claude: {e})_"
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
# ---------------------------------------------------------------
|
| 89 |
+
# Función principal
|
| 90 |
+
# ---------------------------------------------------------------
|
| 91 |
+
def predict(img):
|
| 92 |
+
if img is None:
|
| 93 |
+
return None, None, ""
|
| 94 |
+
|
| 95 |
+
# Preparación
|
| 96 |
+
pil_img = Image.fromarray(np.array(img)).convert("RGB")
|
| 97 |
+
fastai_img = PILImage.create(pil_img)
|
| 98 |
+
|
| 99 |
+
# Ensemble: promedio de probabilidades de ambos modelos
|
| 100 |
+
_, _, probs_cnn = learn_cnn.predict(fastai_img)
|
| 101 |
+
_, _, probs_vit = learn_vit.predict(fastai_img)
|
| 102 |
+
probs = ((probs_cnn + probs_vit) / 2).numpy()
|
| 103 |
+
|
| 104 |
+
# Top-3
|
| 105 |
+
order = sorted(range(len(LABELS)), key=lambda i: -probs[i])[:3]
|
| 106 |
+
label_dict = {LABELS[i]: float(probs[i]) for i in order}
|
| 107 |
+
|
| 108 |
+
# Grad-CAM
|
| 109 |
+
preprocess = T.Compose([T.Resize((224, 224)), T.ToTensor()])
|
| 110 |
+
tensor = preprocess(pil_img).unsqueeze(0)
|
| 111 |
+
if torch.cuda.is_available():
|
| 112 |
+
tensor = tensor.cuda()
|
| 113 |
+
grayscale = cam(input_tensor=tensor)[0]
|
| 114 |
+
rgb = np.array(preprocess(pil_img).permute(1, 2, 0))
|
| 115 |
+
cam_img = show_cam_on_image(rgb, grayscale, use_rgb=True)
|
| 116 |
+
|
| 117 |
+
# Descripción
|
| 118 |
+
description = llm_describe(LABELS[order[0]], LABELS[order[1]])
|
| 119 |
+
|
| 120 |
+
return label_dict, cam_img, description
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
# ---------------------------------------------------------------
|
| 124 |
+
# UI
|
| 125 |
+
# ---------------------------------------------------------------
|
| 126 |
+
DESCRIPTION = (
|
| 127 |
+
"# 🎨 ArtScope\n"
|
| 128 |
+
"Sube un cuadro y descubre a qué **movimiento artístico** pertenece, dónde "
|
| 129 |
+
"está mirando el modelo (mapa Grad-CAM) y qué hace especial a ese estilo "
|
| 130 |
+
"(descripción generada por Claude).\n\n"
|
| 131 |
+
"*Modelo: ensemble ConvNeXt-tiny + ViT-small, fine-tuned sobre un subset de "
|
| 132 |
+
"WikiArt con 10 movimientos.*"
|
| 133 |
+
)
|
| 134 |
+
|
| 135 |
+
with gr.Blocks(title="ArtScope", theme=gr.themes.Soft()) as demo:
|
| 136 |
+
gr.Markdown(DESCRIPTION)
|
| 137 |
+
with gr.Row():
|
| 138 |
+
with gr.Column(scale=1):
|
| 139 |
+
inp = gr.Image(type="numpy", label="Sube un cuadro")
|
| 140 |
+
btn = gr.Button("Analizar", variant="primary")
|
| 141 |
+
gr.Markdown(
|
| 142 |
+
"**Estilos soportados**: "
|
| 143 |
+
+ ", ".join(s.replace("_", " ") for s in LABELS)
|
| 144 |
+
)
|
| 145 |
+
with gr.Column(scale=1):
|
| 146 |
+
out_label = gr.Label(num_top_classes=3, label="Top movimientos")
|
| 147 |
+
out_cam = gr.Image(label="Dónde mira el modelo (Grad-CAM)")
|
| 148 |
+
out_desc = gr.Markdown(label="Descripción del estilo")
|
| 149 |
+
|
| 150 |
+
btn.click(predict, inputs=inp, outputs=[out_label, out_cam, out_desc])
|
| 151 |
+
|
| 152 |
+
if __name__ == "__main__":
|
| 153 |
+
demo.launch()
|
requirements.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastai==2.8.7
|
| 2 |
+
timm==1.0.26
|
| 3 |
+
huggingface_hub[fastai]==1.8.0
|
| 4 |
+
gradio==6.11.0
|
| 5 |
+
grad-cam==1.5.4
|
| 6 |
+
anthropic==0.45.0
|
| 7 |
+
torch
|
| 8 |
+
torchvision
|
| 9 |
+
Pillow
|
| 10 |
+
numpy
|