lst0004's picture
Update app.py
257b6be verified
import os
import base64
import gradio as gr
import openai
from transformers import pipeline
FLOWER_LABELS = ["daisy", "dandelion", "rose", "sunflower", "tulip"]
vit_classifier = pipeline(
"image-classification",
model="./flower-vit-classifier"
)
clip_classifier = pipeline(
task="zero-shot-image-classification",
model="openai/clip-vit-large-patch14"
)
client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
def normalize_label(text):
text = text.strip().lower().replace(".", "").replace(",", "")
for label in FLOWER_LABELS:
if label in text:
return label
return text
def classify_openai(image_path):
with open(image_path, "rb") as f:
b64 = base64.b64encode(f.read()).decode("utf-8")
prompt = f"""
You are a flower image classifier.
Classify the image into exactly one of these labels:
{FLOWER_LABELS}
Return only one label and nothing else.
"""
response = client.responses.create(
model="gpt-4.1-mini",
input=[{
"role": "user",
"content": [
{"type": "input_text", "text": prompt},
{"type": "input_image", "image_url": f"data:image/jpeg;base64,{b64}"}
]
}]
)
return normalize_label(response.output_text)
def classify_flower(image):
vit_results = vit_classifier(image)
vit_output = {x["label"]: float(x["score"]) for x in vit_results}
clip_results = clip_classifier(image, candidate_labels=FLOWER_LABELS)
clip_output = {x["label"]: float(x["score"]) for x in clip_results}
openai_label = classify_openai(image)
return {
"your_vit_model": vit_output,
"clip_zero_shot": clip_output,
"openai_prediction": openai_label
}
example_images = [
["example_images/train/daisy/2488902131_3417698611_n.jpg"],
["example_images/train/dandelion/3554992110_81d8c9b0bd_m.jpg"],
["example_images/train/rose/3415176946_248afe9f32.jpg"],
["example_images/train/sunflower/4932144003_cbffc89bf0.jpg"],
["example_images/train/tulip/4579128789_1561575458_n.jpg"]
]
iface = gr.Interface(
fn=classify_flower,
inputs=gr.Image(type="filepath"),
outputs=gr.JSON(),
title="Flower Classification Comparison",
description="Compare a fine-tuned ViT model, CLIP zero-shot, and OpenAI vision on flower images.",
examples=example_images
)
if __name__ == "__main__":
iface.launch()