| import gradio as gr |
| from transformers import pipeline, CLIPProcessor, CLIPModel |
| from PIL import Image |
| import torch |
| import openai |
| import base64 |
| import io |
|
|
| |
| MY_MODEL_ID = "DEIN-USERNAME/DEIN-MODELL" |
| my_classifier = pipeline("image-classification", model=MY_MODEL_ID) |
|
|
| |
| clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32") |
| clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32") |
|
|
| |
| LABELS = ["cat", "dog", "bird"] |
|
|
| def predict_my_model(image): |
| results = my_classifier(image) |
| return {r["label"]: r["score"] for r in results} |
|
|
| def predict_clip(image): |
| inputs = clip_processor( |
| text=LABELS, images=image, return_tensors="pt", padding=True |
| ) |
| with torch.no_grad(): |
| outputs = clip_model(**inputs) |
| probs = outputs.logits_per_image.softmax(dim=1)[0] |
| return {label: float(prob) for label, prob in zip(LABELS, probs)} |
|
|
| def predict_openai(image): |
| client = openai.OpenAI(api_key=openai.api_key) |
| |
| |
| buf = io.BytesIO() |
| image.save(buf, format="JPEG") |
| b64 = base64.b64encode(buf.getvalue()).decode() |
| |
| response = client.chat.completions.create( |
| model="gpt-4o", |
| messages=[{ |
| "role": "user", |
| "content": [ |
| {"type": "image_url", |
| "image_url": {"url": f"data:image/jpeg;base64,{b64}"}}, |
| {"type": "text", |
| "text": f"Classify this image as one of: {LABELS}. " |
| f"Return only a JSON like: {{\"label\": score, ...}} " |
| f"where scores sum to 1."} |
| ] |
| }], |
| max_tokens=100 |
| ) |
| import json |
| return json.loads(response.choices[0].message.content) |
|
|
| def classify_all(image): |
| r1 = predict_my_model(image) |
| r2 = predict_clip(image) |
| r3 = predict_openai(image) |
| return r1, r2, r3 |
|
|
| |
| examples = ["example1.jpg", "example2.jpg"] |
|
|
| |
| with gr.Blocks(title="Image Classification Comparison") as demo: |
| gr.Markdown("# πΌοΈ Image Classification β Model Comparison") |
| gr.Markdown("Compare your custom model, CLIP, and GPT-4o Vision.") |
| |
| with gr.Row(): |
| img_input = gr.Image(type="pil", label="Upload Image") |
| |
| btn = gr.Button("Classify!", variant="primary") |
| |
| with gr.Row(): |
| out1 = gr.Label(label="π·οΈ My Model") |
| out2 = gr.Label(label="π CLIP") |
| out3 = gr.Label(label="π€ GPT-4o") |
| |
| btn.click(classify_all, inputs=img_input, outputs=[out1, out2, out3]) |
| gr.Examples(examples=examples, inputs=img_input) |
|
|
| demo.launch() |