import os import torch import torch.nn as nn import numpy as np import clip import gradio as gr from PIL import Image from huggingface_hub import hf_hub_download DEVICE = "cuda" if torch.cuda.is_available() else "cpu" print(f"[info] device: {DEVICE}") print("[info] Loading CLIP ViT-L/14 ...") clip_model, preprocess = clip.load("ViT-L/14", device=DEVICE) clip_model.eval() print("[info] Downloading aesthetic-classifier checkpoint ...") ckpt_path = hf_hub_download( repo_id="purplesmartai/aesthetic-classifier", filename="v2.ckpt", ) checkpoint_data = torch.load(ckpt_path, map_location=DEVICE) state_dict = checkpoint_data["state_dict"] state_dict = {k.replace("model.", ""): v for k, v in state_dict.items()} aesthetic_model = nn.Sequential( nn.Linear(768, 1024), nn.ReLU(), nn.Dropout(0.5), nn.Linear(1024, 512), nn.ReLU(), nn.Dropout(0.3), nn.Linear(512, 1), ).to(DEVICE) aesthetic_model.load_state_dict(state_dict) aesthetic_model.eval() print("[info] Model ready.") @torch.no_grad() def get_score(image: Image.Image) -> float: t = preprocess(image.convert("RGB")).unsqueeze(0).to(DEVICE) feat = clip_model.encode_image(t).cpu().numpy().astype("float32") norm = np.linalg.norm(feat, axis=1, keepdims=True) feat = feat / np.where(norm == 0, 1, norm) return aesthetic_model(torch.tensor(feat, device=DEVICE)).item() def raw_to_pony(raw: float) -> int: return int(max(0.0, min(0.99, raw)) * 10) COLOURS = [ "#c0392b", "#e74c3c", "#e67e22", "#f39c12", "#d4ac0d", "#27ae60", "#1e8449", "#148f77", "#0e6655", "#0a4f42", ] def build_html(raw: float) -> str: pony = raw_to_pony(raw) colour = COLOURS[pony] # Two rows of 5 so the grid never overflows rows = [] for row_start in (0, 5): cells = "" for i in range(row_start, row_start + 5): active = i == pony bg = COLOURS[i] if active else "rgba(255,255,255,0.07)" border = f"2px solid {COLOURS[i]}" if active else "2px solid rgba(255,255,255,0.12)" weight = "700" if active else "400" scale = "scale(1.08)" if active else "scale(1)" opac = "1" if active else "0.5" cells += ( f'

' f"score_{i}

" ) rows.append( f'

' f"{cells}

" ) bar_w = min(max(raw, 0.0), 1.0) * 100 return f"""

score_{pony}

raw: {raw:.4f}

{"".join(rows)}

score_0score_9

""" def classify(image): if image is None: return "

Upload an image to score it.

" return build_html(get_score(image)) with gr.Blocks( title="Aesthetic Classifier — PurpleSmartAI", theme=gr.themes.Soft(primary_hue="purple"), css=".gradio-container{max-width:860px!important;margin:auto}" " #title{text-align:center} #sub{text-align:center;color:#888;font-size:.9rem;margin-bottom:1.4rem}", ) as demo: gr.Markdown("# 🎨 Aesthetic Classifier", elem_id="title") gr.Markdown( "CLIP ViT-L/14 regression model by **PurpleSmartAI** for Pony V7 captioning. " "Outputs a **score_0…score_9** tag used directly in training captions.", elem_id="sub", ) with gr.Row(): with gr.Column(scale=1): img_input = gr.Image(type="pil", label="Input Image", height=340) run_btn = gr.Button("✨ Score image", variant="primary", size="lg") with gr.Column(scale=1): out_html = gr.HTML( value="

" "Upload an image to see its score.

", ) gr.Markdown( "---\n**Model:** [`purplesmartai/aesthetic-classifier`]" "(https://huggingface.co/purplesmartai/aesthetic-classifier)" " · **Backbone:** OpenAI CLIP ViT-L/14" ) run_btn.click(fn=classify, inputs=img_input, outputs=out_html) img_input.change(fn=classify, inputs=img_input, outputs=out_html) if __name__ == "__main__": demo.launch()