import torch
import clip
from PIL import Image
import gradio as gr
import datetime

device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)


def allure(image, gender):
    image = Image.fromarray(image.astype("uint8"), "RGB")
    gender = gender.lower()
    image = preprocess(image).unsqueeze(0).to(device)
    positive_terms = [f'a hot {gender}',
                      f'a beautiful {gender}', f'an alluring {gender}', 'a photorealistic image taken with a high-quality camera', 'a photorealistic image taken with a low-quality/bad camera']
    negative_terms = [f'a gross {gender}',
                      f'an ugly {gender}', f'a hideous {gender}', 'a toonish, unrealistic or photoshopped image taken with a high-quality camera', 'a toonish, unrealistic or photoshopped image taken with a low-quality/bad camera']

    pairs = list(zip(positive_terms, negative_terms))

    def evaluate(terms):
        text = clip.tokenize(terms).to(device)

        with torch.no_grad():
            logits_per_image, logits_per_text = model(image, text)
            probs = logits_per_image.softmax(dim=-1).cpu().numpy()
            return probs[0]

    probs = [evaluate(pair) for pair in pairs]

    positive_probs = [prob[0] for prob in probs]
    negative_probs = [prob[1] for prob in probs]

    hotness_score = round((probs[0][0] - probs[0][1] + 1) * 50, 2)
    beauty_score = round((probs[1][0] - probs[1][1] + 1) * 50, 2)
    attractiveness_score = round((probs[2][0] - probs[2][1] + 1) * 50, 2)

    authenticity_score_lq = round((probs[-1][0] - probs[-1][1] + 1) * 50, 2)
    authenticity_score_hq = round((probs[-2][0] - probs[-2][1] + 1) * 50, 2)
    authenticity_score = (authenticity_score_lq + authenticity_score_hq)/2

    hot_score = sum(positive_probs[:-1])/len(positive_probs[:-1])
    ugly_score = sum(negative_probs[:-1])/len(negative_probs[:-1])
    composite = ((hot_score - ugly_score)+1) * 50
    composite = round(composite, 2)

    judgement = "extremely toonish and/or distorted"

    if authenticity_score >= 90:
        judgement = "likely real"
    elif authenticity_score >= 80:
        judgement = "slightly altered"
    elif authenticity_score >= 70:
        judgement = "moderately altered"
    elif authenticity_score >= 50:
        judgement = "significantly toonish or altered"

    return composite, hotness_score, beauty_score, attractiveness_score, authenticity_score_hq, authenticity_score_lq, f"{authenticity_score} ({judgement})"


# theme = gr.themes.Soft(
#     font=[gr.themes.GoogleFont("Quicksand"),
#           "ui-sans-serif", "sans-serif"],
#     font_mono=[gr.themes.GoogleFont("IBM Plex Mono"),
#                "ui-monospace", "monospace"],
#     primary_hue="cyan",
#     secondary_hue="cyan",
#     radius_size="lg")
        
# theme.set(
#     input_radius="64px",
#     button_large_radius='64px',
#     button_small_radius='64px',
#     body_background_fill=theme.block_background_fill_dark,
#     block_shadow=theme.block_shadow_dark,
#     block_label_radius='64px',
#     block_label_right_radius='64px',
#     background_fill_primary=theme.background_fill_primary_dark,
#     background_fill_secondary=theme.background_fill_secondary_dark,
#     block_label_border_width=theme.block_label_border_width_dark,
#     block_label_border_color=theme.block_label_border_color_dark
# )

with gr.Interface(
    # theme=theme,
    fn=allure,
    inputs=[
        gr.Image(label="Image"),
        gr.Dropdown(
            [
                'Person', 'Man', 'Woman'
            ],
            default='Person',
            label="Gender"
        )
    ],
    outputs=[
        gr.Textbox(label="Composite Score (%)"),
        gr.Textbox(label="Hotness (%)"),
        gr.Textbox(label="Beauty (%)"),
        gr.Textbox(label="Allure (%)"),
        gr.Textbox(label="HQ Authenticity (%)"),
        gr.Textbox(label="LQ Authenticity (%)"),
        gr.Textbox(label="Composite Authenticity (≥ 90% → likely real)"),
    ],
    examples=[
        ['Mansib_01_x2048.png', 'Man'],
        ['Mansib_02_x2048.png', 'Man']
    ],
    title=f"Attractiveness Evaluator (powered by OpenAI CLIP) [Updated on {datetime.datetime.now().strftime('%A, %b %d %Y %I:%M:%S%p')}]",
    description=f"""A simple attractiveness evaluation app using the latest, current (newest stable release as of {datetime.datetime.now().strftime('%A, %b %d %Y %I:%M:%S%p')}) version of OpenAI's CLIP model.""",
) as iface:
    with gr.Accordion("How does it work?"):
        gr.Markdown(
            """The input image is passed to OpenAI's CLIP image captioning model and evaluated for how much it conforms to the model's idea of hotness, beauty, and attractiveness. 
These values are then combined to produce a composite score on a scale of 0 to 100.
# ⚠️ WARNING: This is meant solely for educational use!""")

iface.queue(api_open=False)  # Add `api_open = False` to disable direct API access.
iface.launch()