Spaces:
Runtime error
Runtime error
| import os | |
| import json | |
| import re | |
| import base64 | |
| import gradio as gr | |
| from openai import OpenAI | |
| from transformers import pipeline | |
| # Dein selbst trainiertes Modell | |
| vit_classifier = pipeline( | |
| "image-classification", | |
| model="DKatheesrupan/pet-classifier" | |
| ) | |
| # Open-source Vergleichsmodell (Zero-Shot) | |
| clip_classifier = pipeline( | |
| "zero-shot-image-classification", | |
| model="openai/clip-vit-large-patch14" | |
| ) | |
| labels = [ | |
| "Abyssinian", | |
| "american bulldog", | |
| "american pit bull terrier", | |
| "Basset Hound", | |
| "beagle", | |
| "Bengal", | |
| "Birman", | |
| "Bombay", | |
| "boxer", | |
| "British Shorthair", | |
| "chihuahua", | |
| "Egyptian Mau", | |
| "english cocker spaniel", | |
| "english setter", | |
| "german shorthaired", | |
| "great pyrenees", | |
| "havanese", | |
| "japanese chin", | |
| "keeshond", | |
| "leonberger", | |
| "Maine Coon", | |
| "miniature pinscher", | |
| "newfoundland", | |
| "Persian", | |
| "pomeranian", | |
| "pug", | |
| "Ragdoll", | |
| "Russian Blue", | |
| "saint bernard", | |
| "samoyed", | |
| "scottish terrier", | |
| "shiba inu", | |
| "Siamese", | |
| "Sphynx", | |
| "staffordshire bull terrier", | |
| "wheaten terrier", | |
| "yorkshire terrier" | |
| ] | |
| def extract_json(text): | |
| text = text.strip() | |
| try: | |
| return json.loads(text) | |
| except Exception: | |
| pass | |
| match = re.search(r"```(?:json)?\s*(\{.*?\})\s*```", text, flags=re.DOTALL) | |
| if match: | |
| try: | |
| return json.loads(match.group(1)) | |
| except Exception: | |
| pass | |
| match = re.search(r"(\{.*\})", text, flags=re.DOTALL) | |
| if match: | |
| try: | |
| return json.loads(match.group(1)) | |
| except Exception: | |
| pass | |
| return None | |
| def classify_openai(image_path): | |
| api_key = os.getenv("OPENAI_API_KEY") | |
| if not api_key: | |
| return {"warning": "OPENAI_API_KEY is not set."} | |
| client = OpenAI(api_key=api_key) | |
| with open(image_path, "rb") as f: | |
| image_bytes = f.read() | |
| image_b64 = base64.b64encode(image_bytes).decode("utf-8") | |
| prompt = f""" | |
| You are an image classifier for the Oxford-IIIT Pet dataset. | |
| Choose exactly ONE label from this list: | |
| {", ".join(labels)} | |
| Return ONLY valid JSON in this format: | |
| {{ | |
| "label": "one label from the list", | |
| "confidence": 0.0, | |
| "reasoning": "short explanation" | |
| }} | |
| Rules: | |
| - label must be exactly one of the labels above | |
| - confidence must be between 0 and 1 | |
| - if uncertain, choose the closest label from the list | |
| - no markdown | |
| - no code fences | |
| """ | |
| response = client.chat.completions.create( | |
| model="gpt-4o-mini", | |
| messages=[ | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "text", "text": prompt}, | |
| { | |
| "type": "image_url", | |
| "image_url": { | |
| "url": f"data:image/jpeg;base64,{image_b64}" | |
| } | |
| } | |
| ] | |
| } | |
| ], | |
| max_tokens=300 | |
| ) | |
| raw_text = response.choices[0].message.content | |
| parsed = extract_json(raw_text) | |
| if parsed is None: | |
| return { | |
| "raw_response": raw_text, | |
| "warning": "OpenAI response was not valid JSON." | |
| } | |
| return parsed | |
| def classify_pet(image): | |
| vit_results = vit_classifier(image) | |
| vit_output = {item["label"]: round(item["score"], 4) for item in vit_results[:3]} | |
| clip_results = clip_classifier(image, candidate_labels=labels) | |
| clip_output = {item["label"]: round(item["score"], 4) for item in clip_results[:3]} | |
| openai_output = classify_openai(image) | |
| return { | |
| "ViT Classification (My Model)": vit_output, | |
| "CLIP Zero-Shot Classification": clip_output, | |
| "OpenAI Vision Classification": openai_output | |
| } | |
| example_images = [ | |
| "example_images/dog1.jpg", | |
| "example_images/dog2.jpg", | |
| "example_images/leonberger.jpg", | |
| "example_images/cat.jpg" | |
| ] | |
| iface = gr.Interface( | |
| fn=classify_pet, | |
| inputs=gr.Image(type="filepath"), | |
| outputs=gr.JSON(), | |
| title="Oxford Pet Classification Comparison", | |
| description="Vergleich zwischen Fine-Tuned ViT (eigenes Modell), Zero-Shot CLIP und OpenAI Vision.", | |
| examples=example_images, | |
| cache_examples=False | |
| ) | |
| iface.launch() | |