File size: 4,980 Bytes
088c0f6
 
 
 
68c331c
088c0f6
 
 
 
5ccd55e
088c0f6
 
b43460b
088c0f6
5ccd55e
ea45139
5ccd55e
ea45139
088c0f6
 
 
 
 
 
 
 
 
 
 
 
5ccd55e
088c0f6
 
 
 
 
 
 
ea45139
 
 
c804b68
 
 
088c0f6
 
c2f18b1
d9981be
a5a43d4
 
d9981be
a5a43d4
 
 
c2f18b1
a5a43d4
 
87c321c
ea45139
c804b68
088c0f6
1ff90fd
 
 
 
 
 
 
 
83cdad8
9eb811d
 
 
 
 
 
 
 
 
 
 
 
 
6299cee
2a5ede3
1ff90fd
088c0f6
 
7b11c27
 
088c0f6
b43460b
088c0f6
b43460b
afad819
088c0f6
 
 
ea45139
b43460b
 
 
ea45139
 
 
088c0f6
5ccd55e
afad819
 
 
9eb811d
 
5ccd55e
 
 
c804b68
5ccd55e
c484b22
5ccd55e
1ff90fd
088c0f6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import torch
import clip
from PIL import Image
import gradio as gr
import datetime

device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)


def allure(image, gender):
    image = Image.fromarray(image.astype("uint8"), "RGB")
    gender = gender.lower()
    image = preprocess(image).unsqueeze(0).to(device)
    positive_terms = [f'a hot {gender}',
                      f'a beautiful {gender}', f'an alluring {gender}', 'a photorealistic image taken with a high-quality camera', 'a photorealistic image taken with a low-quality/bad camera']
    negative_terms = [f'a gross {gender}',
                      f'an ugly {gender}', f'a hideous {gender}', 'a toonish, unrealistic or photoshopped image taken with a high-quality camera', 'a toonish, unrealistic or photoshopped image taken with a low-quality/bad camera']

    pairs = list(zip(positive_terms, negative_terms))

    def evaluate(terms):
        text = clip.tokenize(terms).to(device)

        with torch.no_grad():
            logits_per_image, logits_per_text = model(image, text)
            probs = logits_per_image.softmax(dim=-1).cpu().numpy()
            return probs[0]

    probs = [evaluate(pair) for pair in pairs]

    positive_probs = [prob[0] for prob in probs]
    negative_probs = [prob[1] for prob in probs]

    hotness_score = round((probs[0][0] - probs[0][1] + 1) * 50, 2)
    beauty_score = round((probs[1][0] - probs[1][1] + 1) * 50, 2)
    attractiveness_score = round((probs[2][0] - probs[2][1] + 1) * 50, 2)

    authenticity_score_lq = round((probs[-1][0] - probs[-1][1] + 1) * 50, 2)
    authenticity_score_hq = round((probs[-2][0] - probs[-2][1] + 1) * 50, 2)
    authenticity_score = (authenticity_score_lq + authenticity_score_hq)/2

    hot_score = sum(positive_probs[:-1])/len(positive_probs[:-1])
    ugly_score = sum(negative_probs[:-1])/len(negative_probs[:-1])
    composite = ((hot_score - ugly_score)+1) * 50
    composite = round(composite, 2)

    judgement = "extremely toonish and/or distorted"

    if authenticity_score >= 90:
        judgement = "likely real"
    elif authenticity_score >= 80:
        judgement = "slightly altered"
    elif authenticity_score >= 70:
        judgement = "moderately altered"
    elif authenticity_score >= 50:
        judgement = "significantly toonish or altered"

    return composite, hotness_score, beauty_score, attractiveness_score, authenticity_score_hq, authenticity_score_lq, f"{authenticity_score} ({judgement})"


# theme = gr.themes.Soft(
#     font=[gr.themes.GoogleFont("Quicksand"),
#           "ui-sans-serif", "sans-serif"],
#     font_mono=[gr.themes.GoogleFont("IBM Plex Mono"),
#                "ui-monospace", "monospace"],
#     primary_hue="cyan",
#     secondary_hue="cyan",
#     radius_size="lg")
        
# theme.set(
#     input_radius="64px",
#     button_large_radius='64px',
#     button_small_radius='64px',
#     body_background_fill=theme.block_background_fill_dark,
#     block_shadow=theme.block_shadow_dark,
#     block_label_radius='64px',
#     block_label_right_radius='64px',
#     background_fill_primary=theme.background_fill_primary_dark,
#     background_fill_secondary=theme.background_fill_secondary_dark,
#     block_label_border_width=theme.block_label_border_width_dark,
#     block_label_border_color=theme.block_label_border_color_dark
# )

with gr.Interface(
    # theme=theme,
    fn=allure,
    inputs=[
        gr.Image(label="Image"),
        gr.Dropdown(
            [
                'Person', 'Man', 'Woman'
            ],
            default='Person',
            label="Gender"
        )
    ],
    outputs=[
        gr.Textbox(label="Composite Score (%)"),
        gr.Textbox(label="Hotness (%)"),
        gr.Textbox(label="Beauty (%)"),
        gr.Textbox(label="Allure (%)"),
        gr.Textbox(label="HQ Authenticity (%)"),
        gr.Textbox(label="LQ Authenticity (%)"),
        gr.Textbox(label="Composite Authenticity (≥ 90% → likely real)"),
    ],
    examples=[
        ['Mansib_01_x2048.png', 'Man'],
        ['Mansib_02_x2048.png', 'Man']
    ],
    title=f"Attractiveness Evaluator (powered by OpenAI CLIP) [Updated on {datetime.datetime.now().strftime('%A, %b %d %Y %I:%M:%S%p')}]",
    description=f"""A simple attractiveness evaluation app using the latest, current (newest stable release as of {datetime.datetime.now().strftime('%A, %b %d %Y %I:%M:%S%p')}) version of OpenAI's CLIP model.""",
) as iface:
    with gr.Accordion("How does it work?"):
        gr.Markdown(
            """The input image is passed to OpenAI's CLIP image captioning model and evaluated for how much it conforms to the model's idea of hotness, beauty, and attractiveness. 
These values are then combined to produce a composite score on a scale of 0 to 100.
# ⚠️ WARNING: This is meant solely for educational use!""")

iface.queue(api_open=False)  # Add `api_open = False` to disable direct API access.
iface.launch()