Spaces:

DKatheesrupan
/

Exercise2

Running

File size: 4,616 Bytes

import os
import json
import base64
from pathlib import Path

import gradio as gr
from openai import OpenAI
from transformers import pipeline

BASE_DIR = Path(__file__).resolve().parent
EXAMPLE_DIR = BASE_DIR / "example_images"

MODEL_PATH = "DKatheesrupan/cat-vit"

CAT_LABELS = ["cheetah", "leopard", "lion", "puma", "tiger"]

print("Loading custom model...")
vit_classifier = pipeline(
    "image-classification",
    model=MODEL_PATH
)

print("Loading CLIP model...")
clip_classifier = pipeline(
    task="zero-shot-image-classification",
    model="openai/clip-vit-base-patch32"
)

# OpenAI key comes from Hugging Face Space secret: OPENAI_API_KEY
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))


# ----------------------------
# Helper functions
# ----------------------------

def encode_image(image_path):
    with open(image_path, "rb") as f:
        return base64.b64encode(f.read()).decode("utf-8")


def normalize_custom_labels(results):
    id2label = {
        "LABEL_0": "cheetah",
        "LABEL_1": "leopard",
        "LABEL_2": "lion",
        "LABEL_3": "puma",
        "LABEL_4": "tiger",
    }

    output = {}

    for r in results:
        label = r["label"]
        score = float(r["score"])

        if label in id2label:
            label = id2label[label]
        else:
            label = label.lower()

        output[label] = score

    return output


def classify_with_openai(image_path):
    base64_image = encode_image(image_path)

    prompt = f"""
You are a big cat classifier.

Classify the image into exactly one of these labels:

{CAT_LABELS}

Return ONLY valid JSON.
Do not use markdown.
Do not use code fences.
Do not add explanations.

Required format:
{{"label":"one_of_{CAT_LABELS}","confidence":0.0}}
"""

    try:
        response = client.responses.create(
            model="gpt-4.1-mini",
            input=[
                {
                    "role": "user",
                    "content": [
                        {"type": "input_text", "text": prompt},
                        {
                            "type": "input_image",
                            "image_url": f"data:image/jpeg;base64,{base64_image}"
                        }
                    ]
                }
            ]
        )

        text = response.output_text.strip()
        text = text.replace("```json", "").replace("```", "").strip()

        start = text.find("{")
        end = text.rfind("}")
        if start != -1 and end != -1 and end > start:
            text = text[start:end + 1]

        result = json.loads(text)

        label = str(result["label"]).strip().lower()
        confidence = float(result["confidence"])

        if label not in CAT_LABELS:
            raise ValueError(f"Invalid label: {label}")

        confidence = max(0.0, min(1.0, confidence))
        remaining = 1.0 - confidence
        num_other = len(CAT_LABELS) - 1

        distribution = {}

        for l in CAT_LABELS:
            if l == label:
                distribution[l] = confidence
            else:
                distribution[l] = remaining / num_other

        return distribution

    except Exception:
        return {"unknown": 1.0}


# ----------------------------
# Main function
# ----------------------------

def classify_cat(image):
    # Custom Model
    vit_results = vit_classifier(image)
    vit_output = normalize_custom_labels(vit_results)

    # CLIP
    clip_labels = [f"a photo of a {label}" for label in CAT_LABELS]
    clip_results = clip_classifier(image, candidate_labels=clip_labels)

    clip_output = {}
    for r in clip_results:
        label = r["label"].replace("a photo of a ", "").lower()
        score = float(r["score"])
        clip_output[label] = score

    # OpenAI
    openai_output = classify_with_openai(image)

    return vit_output, clip_output, openai_output


# ----------------------------
# Example images
# ----------------------------

example_images = [
    [str(EXAMPLE_DIR / "Cheetah_032.jpg")],
    [str(EXAMPLE_DIR / "Leopard_001.jpg")],
    [str(EXAMPLE_DIR / "Lion_003.jpg")],
    [str(EXAMPLE_DIR / "Puma_001.jpg")],
    [str(EXAMPLE_DIR / "Tiger_001.jpg")]
]


# ----------------------------
# Interface
# ----------------------------

iface = gr.Interface(
    fn=classify_cat,
    inputs=gr.Image(type="filepath"),
    outputs=[
        gr.Label(label="Custom Model"),
        gr.Label(label="CLIP"),
        gr.Label(label="OpenAI")
    ],
    title="Big Cat Classification",
    description="Compare Custom Model vs CLIP vs OpenAI",
    examples=example_images
)

if __name__ == "__main__":
    iface.launch()