Spaces:

wueesnin
/

image_comparison

Sleeping

App Files Files Community

wueesnin commited on Apr 11

Commit

4194462

verified ·

1 Parent(s): 2fa315d

Update app.py

Browse files

Updated app.py function

Files changed (1) hide show

app.py +280 -0

app.py CHANGED Viewed

	@@ -0,0 +1,280 @@

+import os
+from typing import List, Dict, Tuple
+import gradio as gr
+import torch
+from PIL import Image
+from transformers import (
+    AutoImageProcessor,
+    AutoModelForImageClassification,
+    CLIPModel,
+    CLIPProcessor,
+)
+# Optional OpenAI client. The app still works without it.
+try:
+    from openai import OpenAI
+except Exception:
+    OpenAI = None
+# =========================================================
+# Configuration
+# =========================================================
+# Replace these labels with your final dataset classes.
+CLASS_LABELS: List[str] = [
+    "sphynx",
+    "russian blue",
+    "maine coon",
+    "ragdoll",
+    "bengal",
+    "singapura",
+    "calico cat"
+]
+# Your fine-tuned Hugging Face image classification model.
+# Example: "your-username/cat-vs-wild-animal-vit"
+CUSTOM_MODEL_ID = os.getenv("CUSTOM_MODEL_ID", "your-username/your-model-name")
+# Open-source comparison model.
+CLIP_MODEL_ID = os.getenv("CLIP_MODEL_ID", "openai/clip-vit-base-patch32")
+# Example images shown in Gradio. Add real files before deployment.
+EXAMPLE_IMAGES = [
+    ["example_images/sphynx.jpg"],
+    ["example_images/russian-blue.jpg"],
+    ["example_images/maine-coon.jpg"],
+    ["example_images/ragdoll.jpg"],
+    ["example_images/bengal.jpg"],
+    ["example_images/singapura.jpg"],
+    ["example_images/calico.jpg"],
+]
+# =========================================================
+# Model loading
+# =========================================================
+device = "cuda" if torch.cuda.is_available() else "cpu"
+custom_processor = None
+custom_model = None
+custom_model_error = None
+clip_processor = None
+clip_model = None
+clip_model_error = None
+def load_custom_model() -> None:
+    global custom_processor, custom_model, custom_model_error
+    try:
+        custom_processor = AutoImageProcessor.from_pretrained(CUSTOM_MODEL_ID)
+        custom_model = AutoModelForImageClassification.from_pretrained(CUSTOM_MODEL_ID)
+        custom_model.to(device)
+        custom_model.eval()
+    except Exception as exc:
+        custom_model_error = str(exc)
+def load_clip_model() -> None:
+    global clip_processor, clip_model, clip_model_error
+    try:
+        clip_processor = CLIPProcessor.from_pretrained(CLIP_MODEL_ID)
+        clip_model = CLIPModel.from_pretrained(CLIP_MODEL_ID)
+        clip_model.to(device)
+        clip_model.eval()
+    except Exception as exc:
+        clip_model_error = str(exc)
+load_custom_model()
+load_clip_model()
+# =========================================================
+# Helpers
+# =========================================================
+def ensure_rgb(image: Image.Image) -> Image.Image:
+    if image.mode != "RGB":
+        image = image.convert("RGB")
+    return image
+def format_topk(predictions: List[Tuple[str, float]]) -> str:
+    lines = []
+    for rank, (label, score) in enumerate(predictions, start=1):
+        lines.append(f"{rank}. {label} ({score:.4f})")
+    return "\n".join(lines)
+def predict_custom_model(image: Image.Image, top_k: int = 3) -> Tuple[str, Dict[str, float]]:
+    if custom_model is None or custom_processor is None:
+        message = (
+            "Custom model could not be loaded.\n\n"
+            f"Model ID: {CUSTOM_MODEL_ID}\n"
+            f"Error: {custom_model_error}"
+        )
+        return message, {}
+    image = ensure_rgb(image)
+    inputs = custom_processor(images=image, return_tensors="pt")
+    inputs = {k: v.to(device) for k, v in inputs.items()}
+    with torch.no_grad():
+        outputs = custom_model(**inputs)
+        probs = torch.softmax(outputs.logits, dim=-1)[0]
+    id2label = custom_model.config.id2label
+    top_indices = torch.topk(probs, k=min(top_k, probs.shape[0])).indices.tolist()
+    top_preds = []
+    label_scores = {}
+    for idx in top_indices:
+        label = id2label.get(idx, str(idx))
+        score = probs[idx].item()
+        top_preds.append((label, score))
+        label_scores[label] = score
+    return format_topk(top_preds), label_scores
+def predict_clip(image: Image.Image, class_labels: List[str], top_k: int = 3) -> Tuple[str, Dict[str, float]]:
+    if clip_model is None or clip_processor is None:
+        message = (
+            "CLIP model could not be loaded.\n\n"
+            f"Model ID: {CLIP_MODEL_ID}\n"
+            f"Error: {clip_model_error}"
+        )
+        return message, {}
+    image = ensure_rgb(image)
+    prompts = [f"a photo of a {label}" for label in class_labels]
+    inputs = clip_processor(text=prompts, images=image, return_tensors="pt", padding=True)
+    inputs = {k: v.to(device) for k, v in inputs.items()}
+    with torch.no_grad():
+        outputs = clip_model(**inputs)
+        logits = outputs.logits_per_image[0]
+        probs = torch.softmax(logits, dim=-1)
+    pairs = [(label, probs[i].item()) for i, label in enumerate(class_labels)]
+    pairs.sort(key=lambda x: x[1], reverse=True)
+    top_preds = pairs[:top_k]
+    label_scores = {label: score for label, score in pairs}
+    return format_topk(top_preds), label_scores
+def predict_openai(image: Image.Image, class_labels: List[str]) -> str:
+    if OpenAI is None:
+        return "OpenAI package is not installed. Add `openai` to requirements.txt."
+    api_key = os.getenv("OPENAI_API_KEY")
+    if not api_key:
+        return "OPENAI_API_KEY is not set. The app can still run without the OpenAI comparison."
+    try:
+        client = OpenAI(api_key=api_key)
+        # Convert image to bytes for upload.
+        import io
+        buffer = io.BytesIO()
+        ensure_rgb(image).save(buffer, format="JPEG")
+        buffer.seek(0)
+        uploaded = client.files.create(file=("image.jpg", buffer.getvalue(), "image/jpeg"), purpose="vision")
+        prompt = (
+            "You are an image classifier. "
+            "Choose exactly one label from this label set: "
+            f"{', '.join(class_labels)}. "
+            "Return a short answer with this structure only: "
+            "label: <chosen label>\\nreason: <very short reason>."
+        )
+        response = client.responses.create(
+            model="gpt-4.1-mini",
+            input=[
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "input_text", "text": prompt},
+                        {"type": "input_image", "file_id": uploaded.id},
+                    ],
+                }
+            ],
+        )
+        return response.output_text.strip()
+    except Exception as exc:
+        return f"OpenAI prediction failed: {exc}"
+def compare_models(image: Image.Image) -> Tuple[str, Dict[str, float], str, Dict[str, float], str]:
+    if image is None:
+        return "Please upload an image.", {}, "Please upload an image.", {}, "Please upload an image."
+    custom_text, custom_scores = predict_custom_model(image)
+    clip_text, clip_scores = predict_clip(image, CLASS_LABELS)
+    openai_text = predict_openai(image, CLASS_LABELS)
+    return custom_text, custom_scores, clip_text, clip_scores, openai_text
+# =========================================================
+# UI
+# =========================================================
+DESCRIPTION = """
+Upload an image and compare three approaches:
+1. Fine-tuned transfer learning model
+2. Zero-shot CLIP
+3. OpenAI vision model
+This version focuses only on cat breed classification.
+"""
+with gr.Blocks() as demo:
+    gr.Markdown("# Cat Breed Classifier")
+    gr.Markdown(DESCRIPTION)
+    with gr.Row():
+        image_input = gr.Image(type="pil", label="Upload image")
+    run_btn = gr.Button("Run comparison")
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("## Fine-tuned model")
+            custom_text = gr.Textbox(label="Top predictions", lines=6)
+            custom_plot = gr.Label(label="Scores")
+        with gr.Column():
+            gr.Markdown("## CLIP zero-shot")
+            clip_text = gr.Textbox(label="Top predictions", lines=6)
+            clip_plot = gr.Label(label="Scores")
+        with gr.Column():
+            gr.Markdown("## OpenAI vision")
+            openai_text = gr.Textbox(label="Prediction", lines=6)
+    run_btn.click(
+        fn=compare_models,
+        inputs=image_input,
+        outputs=[custom_text, custom_plot, clip_text, clip_plot, openai_text],
+    )
+    gr.Examples(
+        examples=EXAMPLE_IMAGES,
+        inputs=image_input,
+        label="Example images",
+    )
+if __name__ == "__main__":
+    demo.launch()