Exercise2 / app.py
DKatheesrupan's picture
Update app.py
228da07 verified
import os
import json
import base64
from pathlib import Path
import gradio as gr
from openai import OpenAI
from transformers import pipeline
BASE_DIR = Path(__file__).resolve().parent
EXAMPLE_DIR = BASE_DIR / "example_images"
MODEL_PATH = "DKatheesrupan/cat-vit"
CAT_LABELS = ["cheetah", "leopard", "lion", "puma", "tiger"]
print("Loading custom model...")
vit_classifier = pipeline(
"image-classification",
model=MODEL_PATH
)
print("Loading CLIP model...")
clip_classifier = pipeline(
task="zero-shot-image-classification",
model="openai/clip-vit-base-patch32"
)
# OpenAI key comes from Hugging Face Space secret: OPENAI_API_KEY
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
# ----------------------------
# Helper functions
# ----------------------------
def encode_image(image_path):
with open(image_path, "rb") as f:
return base64.b64encode(f.read()).decode("utf-8")
def normalize_custom_labels(results):
id2label = {
"LABEL_0": "cheetah",
"LABEL_1": "leopard",
"LABEL_2": "lion",
"LABEL_3": "puma",
"LABEL_4": "tiger",
}
output = {}
for r in results:
label = r["label"]
score = float(r["score"])
if label in id2label:
label = id2label[label]
else:
label = label.lower()
output[label] = score
return output
def classify_with_openai(image_path):
base64_image = encode_image(image_path)
prompt = f"""
You are a big cat classifier.
Classify the image into exactly one of these labels:
{CAT_LABELS}
Return ONLY valid JSON.
Do not use markdown.
Do not use code fences.
Do not add explanations.
Required format:
{{"label":"one_of_{CAT_LABELS}","confidence":0.0}}
"""
try:
response = client.responses.create(
model="gpt-4.1-mini",
input=[
{
"role": "user",
"content": [
{"type": "input_text", "text": prompt},
{
"type": "input_image",
"image_url": f"data:image/jpeg;base64,{base64_image}"
}
]
}
]
)
text = response.output_text.strip()
text = text.replace("```json", "").replace("```", "").strip()
start = text.find("{")
end = text.rfind("}")
if start != -1 and end != -1 and end > start:
text = text[start:end + 1]
result = json.loads(text)
label = str(result["label"]).strip().lower()
confidence = float(result["confidence"])
if label not in CAT_LABELS:
raise ValueError(f"Invalid label: {label}")
confidence = max(0.0, min(1.0, confidence))
remaining = 1.0 - confidence
num_other = len(CAT_LABELS) - 1
distribution = {}
for l in CAT_LABELS:
if l == label:
distribution[l] = confidence
else:
distribution[l] = remaining / num_other
return distribution
except Exception:
return {"unknown": 1.0}
# ----------------------------
# Main function
# ----------------------------
def classify_cat(image):
# Custom Model
vit_results = vit_classifier(image)
vit_output = normalize_custom_labels(vit_results)
# CLIP
clip_labels = [f"a photo of a {label}" for label in CAT_LABELS]
clip_results = clip_classifier(image, candidate_labels=clip_labels)
clip_output = {}
for r in clip_results:
label = r["label"].replace("a photo of a ", "").lower()
score = float(r["score"])
clip_output[label] = score
# OpenAI
openai_output = classify_with_openai(image)
return vit_output, clip_output, openai_output
# ----------------------------
# Example images
# ----------------------------
example_images = [
[str(EXAMPLE_DIR / "Cheetah_032.jpg")],
[str(EXAMPLE_DIR / "Leopard_001.jpg")],
[str(EXAMPLE_DIR / "Lion_003.jpg")],
[str(EXAMPLE_DIR / "Puma_001.jpg")],
[str(EXAMPLE_DIR / "Tiger_001.jpg")]
]
# ----------------------------
# Interface
# ----------------------------
iface = gr.Interface(
fn=classify_cat,
inputs=gr.Image(type="filepath"),
outputs=[
gr.Label(label="Custom Model"),
gr.Label(label="CLIP"),
gr.Label(label="OpenAI")
],
title="Big Cat Classification",
description="Compare Custom Model vs CLIP vs OpenAI",
examples=example_images
)
if __name__ == "__main__":
iface.launch()