import gradio as gr
from transformers import pipeline, AutoImageProcessor, AutoModelForImageClassification

# Image Processor explizit laden!
processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
model = AutoModelForImageClassification.from_pretrained("Granitagushi/vit-base-fruits-360")

vit_classifier = pipeline(
    "image-classification",
    model=model,
    image_processor=processor,
    device=0  # oder -1 für CPU
)

clip_detector = pipeline(
    model="openai/clip-vit-large-patch14",
    task="zero-shot-image-classification"
)

labels_fruits = [
    'Orange', 'Strawberry Wedge', 'Banana', 'Cherry', 'Apple Red'
]

def classify_fruit(image):
    vit_results = vit_classifier(image)
    vit_output = {result['label']: result['score'] for result in vit_results}
    clip_results = clip_detector(image, candidate_labels=labels_fruits)
    clip_output = {result['label']: result['score'] for result in clip_results}
    return {"ViT Classification": vit_output, "CLIP Zero-Shot Classification": clip_output}

example_images = [
    ["example_images/Apple.jpg"],
    ["example_images/Banana.jpg"],
    ["example_images/Cherry.jpg"],
    ["example_images/orange.jpg"],
    ["example_images/strawberry.jpg"]
]

iface = gr.Interface(
    fn=classify_fruit,
    inputs=gr.Image(type="filepath"),
    outputs=gr.JSON(),
    title="Fruit Classification Comparison",
    description="Upload an image of a fruit, and compare results from a trained ViT model and a zero-shot CLIP model.",
    examples=example_images
)

if __name__ == "__main__":
    iface.launch()