Spaces:

dewiri
/

Pet_Classification_Comparison

Sleeping

App Files Files Community

Pet_Classification_Comparison / app.py

dewiri

Upload app.py

e36e780 verified 11 months ago

raw

history blame

9.02 kB

	from transformers import pipeline

	checkpoint = "openai/clip-vit-large-patch14"
	detector = pipeline(model=checkpoint, task="zero-shot-image-classification")
	#checkpoint = "google/siglip-so400m-patch14-384"
	#detector = pipeline(task="zero-shot-image-classification", model="google/siglip-so400m-patch14-384")

	from datasets import load_dataset

	dataset = load_dataset('pcuenq/oxford-pets')
	dataset

	dataset['train'][0]['image']

	from PIL import Image
	import io
	from tqdm import tqdm

	labels_oxford_pets = ['Siamese', 'Birman', 'shiba inu', 'staffordshire bull terrier', 'basset hound', 'Bombay', 'japanese chin', 'chihuahua', 'german shorthaired', 'pomeranian', 'beagle', 'english cocker spaniel', 'american pit bull terrier', 'Ragdoll', 'Persian', 'Egyptian Mau', 'miniature pinscher', 'Sphynx', 'Maine Coon', 'keeshond', 'yorkshire terrier', 'havanese', 'leonberger', 'wheaten terrier', 'american bulldog', 'english setter', 'boxer', 'newfoundland', 'Bengal', 'samoyed', 'British Shorthair', 'great pyrenees', 'Abyssinian', 'pug', 'saint bernard', 'Russian Blue', 'scottish terrier']

	# List to store true labels and predicted labels
	true_labels = []
	predicted_labels = []


	for i in tqdm(range(len(dataset['train']))):
	# Get the image bytes from the dataset
	image_bytes = dataset['train'][i]['image']['bytes']

	# Convert the bytes to a PIL image
	image = Image.open(io.BytesIO(image_bytes))

	# Run the detector on the image with the provided labels
	results = detector(image, candidate_labels=labels_oxford_pets)
	# Sort the results by score in descending order
	sorted_results = sorted(results, key=lambda x: x['score'], reverse=True)

	# Get the top predicted label
	predicted_label = sorted_results[0]['label']

	# Append the true and predicted labels to the respective lists
	true_labels.append(dataset['train'][i]['label'])
	predicted_labels.append(predicted_label)


	from sklearn.metrics import accuracy_score, precision_score, recall_score

	# Calculate accuracy
	accuracy = accuracy_score(true_labels, predicted_labels)

	# Calculate precision and recall
	precision = precision_score(true_labels, predicted_labels, average='weighted', labels=labels_oxford_pets)
	recall = recall_score(true_labels, predicted_labels, average='weighted', labels=labels_oxford_pets)

	# Print the results
	print(f"Accuracy: {accuracy:.4f}")
	print(f"Precision: {precision:.4f}")
	print(f"Recall: {recall:.4f}")

	import gradio as gr
	from transformers import pipeline

	# Load models
	vit_classifier = pipeline("image-classification", model="kuhs/vit-base-oxford-iiit-pets")
	clip_detector = pipeline(model="openai/clip-vit-large-patch14", task="zero-shot-image-classification")

	labels_oxford_pets = [
	'Siamese', 'Birman', 'shiba inu', 'staffordshire bull terrier', 'basset hound', 'Bombay', 'japanese chin',
	'chihuahua', 'german shorthaired', 'pomeranian', 'beagle', 'english cocker spaniel', 'american pit bull terrier',
	'Ragdoll', 'Persian', 'Egyptian Mau', 'miniature pinscher', 'Sphynx', 'Maine Coon', 'keeshond', 'yorkshire terrier',
	'havanese', 'leonberger', 'wheaten terrier', 'american bulldog', 'english setter', 'boxer', 'newfoundland', 'Bengal',
	'samoyed', 'British Shorthair', 'great pyrenees', 'Abyssinian', 'pug', 'saint bernard', 'Russian Blue', 'scottish terrier'
	]

	def classify_pet(image):
	vit_results = vit_classifier(image)
	vit_output = {result['label']: result['score'] for result in vit_results}

	clip_results = clip_detector(image, candidate_labels=labels_oxford_pets)
	clip_output = {result['label']: result['score'] for result in clip_results}

	return {"ViT Classification": vit_output, "CLIP Zero-Shot Classification": clip_output}

	example_images = [
	["example_images/dog1.jpeg"],
	["example_images/dog2.jpeg"],
	["example_images/leonberger.jpg"],
	["example_images/snow_leopard.jpeg"],
	["example_images/cat.jpg"]
	]

	iface = gr.Interface(
	fn=classify_pet,
	inputs=gr.Image(type="filepath"),
	outputs=gr.JSON(),
	title="Pet Classification Comparison",
	description="Upload an image of a pet, and compare results from a trained ViT model and a zero-shot CLIP model.",
	examples=example_images
	)

	iface.launch()

	import io
	from PIL import Image

	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt

	import torch
	import torch.nn as nn

	from huggingface_hub import notebook_login

	from datasets import load_dataset, DatasetDict

	from transformers import AutoImageProcessor, ViTForImageClassification

	from transformers import Trainer, TrainingArguments

	import evaluate

	# Login onto Hugging Face hub to load any private dataset/model.
	# We need to login as we'll also upload our model to the hub
	notebook_login()

	dataset = load_dataset('pcuenq/oxford-pets')
	dataset

	dataset['train'][0]

	labels = dataset['train'].unique('label')
	print(len(labels),labels)

	def show_samples(ds,rows,cols):
	samples = ds.shuffle().select(np.arange(rows*cols)) # selecting random images
	fig = plt.figure(figsize=(cols4,rows4))
	# plotting
	for i in range(rows*cols):
	img_bytes = samples[i]['image']['bytes']
	img = Image.open(io.BytesIO(img_bytes))
	label = samples[i]['label']
	fig.add_subplot(rows,cols,i+1)
	plt.imshow(img)
	plt.title(label)
	plt.axis('off')

	show_samples(dataset['train'],rows=3,cols=5)

	split_dataset = dataset['train'].train_test_split(test_size=0.2) # 80% train, 20% evaluation
	eval_dataset = split_dataset['test'].train_test_split(test_size=0.5) # 50% validation, 50% test

	# recombining the splits using a DatasetDict

	our_dataset = DatasetDict({
	'train': split_dataset['train'],
	'validation': eval_dataset['train'],
	'test': eval_dataset['test']
	})

	our_dataset

	label2id = {c:idx for idx,c in enumerate(labels)}
	id2label = {idx:c for idx,c in enumerate(labels)}

	processor = AutoImageProcessor.from_pretrained('google/vit-base-patch16-224')
	processor

	def transforms(batch):
	batch['image'] = [Image.open(io.BytesIO(x['bytes'])).convert('RGB') for x in batch['image']]
	inputs = processor(batch['image'],return_tensors='pt')
	inputs['labels']=[label2id[y] for y in batch['label']]
	return inputs



	processed_dataset = our_dataset.with_transform(transforms)

	processed_dataset

	def collate_fn(batch):
	return {
	'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
	'labels': torch.tensor([x['labels'] for x in batch])
	}

	accuracy = evaluate.load('accuracy')
	def compute_metrics(eval_preds):
	logits, labels = eval_preds
	predictions = np.argmax(logits,axis=1)
	score = accuracy.compute(predictions=predictions, references=labels)
	return score

	model = ViTForImageClassification.from_pretrained(
	'google/vit-base-patch16-224',
	num_labels = len(labels),
	id2label = id2label,
	label2id = label2id,
	ignore_mismatched_sizes = True
	)

	model

	for name,p in model.named_parameters():
	if not name.startswith('classifier'):
	p.requires_grad = False

	num_params = sum([p.numel() for p in model.parameters()])
	trainable_params = sum([p.numel() for p in model.parameters() if p.requires_grad])

	print(f"{num_params = :,} \| {trainable_params = :,}")

	training_args = TrainingArguments(
	output_dir="./vit-base-oxford-iiit-pets",
	per_device_train_batch_size=16,
	evaluation_strategy="epoch",
	save_strategy="epoch",
	logging_steps=100,
	num_train_epochs=5,
	learning_rate=3e-4,
	save_total_limit=2,
	remove_unused_columns=False,
	push_to_hub=True,
	report_to='tensorboard',
	load_best_model_at_end=True,
	run_name="transferlearning-google_vit-base-patch16-224 no data augmentation"
	)

	trainer = Trainer(
	model=model,
	args=training_args,
	data_collator=collate_fn,
	compute_metrics=compute_metrics,
	train_dataset=processed_dataset["train"],
	eval_dataset=processed_dataset["validation"],
	tokenizer=processor
	)

	trainer.train()

	trainer.evaluate(processed_dataset['test'])

	def show_predictions(rows,cols):
	samples = our_dataset['test'].shuffle().select(np.arange(rows*cols))
	processed_samples = samples.with_transform(transforms)
	predictions = trainer.predict(processed_samples).predictions.argmax(axis=1) # predicted labels from logits
	fig = plt.figure(figsize=(cols4,rows4))
	for i in range(rows*cols):
	img_bytes = samples[i]['image']['bytes']
	img = Image.open(io.BytesIO(img_bytes))
	prediction = predictions[i]
	label = f"label: {samples[i]['label']}\npredicted: {id2label[prediction]}"
	fig.add_subplot(rows,cols,i+1)
	plt.imshow(img)
	plt.title(label)
	plt.axis('off')

	show_predictions(rows=5,cols=5)

	kwargs = {
	"finetuned_from": model.config._name_or_path,
	"dataset": 'pcuenq/oxford-pets',
	"tasks": "image-classification",
	"tags": ['image-classification'],
	}

	trainer.save_model()
	trainer.push_to_hub('🐕️🐈️', **kwargs)