Getting bad results
#2
by
nomeata - opened
Thanks for this model! I tried to get it to work in a google colab sheet, but I am getting only random looking results, even for “easy” queries. Maybe I am preparing the image data wrongly?
from transformers import MobileViTImageProcessor, MobileViTV2ForImageClassification
from PIL import Image, ImageOps
import requests
import torch
import numpy as np # Importing NumPy
url = "https://static.thenounproject.com/png/2024184-200.png"
url = "https://static.thenounproject.com/png/leaf-icon-8044960-200.png"
url = "https://static.thenounproject.com/png/headphone-icon-8123251-200.png"
response = requests.get(url, stream=True)
# Open the image
image1 = Image.open(response.raw)
# Replace alpha channel with white
if image1.mode == 'RGBA':
background = Image.new('RGB', image1.size, (255, 255, 255))
background.paste(image1, mask=image1.split()[3]) # 3 is the alpha channel
image1 = background
# Convert to grayscale to ensure a single channel input
image = image1.resize((200,200)).convert('L') # Convert to grayscale
# inversion helps? tried with and without
image = ImageOps.invert(image)
processor = MobileViTImageProcessor.from_pretrained("laszlokiss27/doodle-dash2")
model = MobileViTV2ForImageClassification.from_pretrained("laszlokiss27/doodle-dash2")
# Convert the PIL image to a tensor and add a channel dimension
image_tensor = torch.unsqueeze(torch.tensor(np.array(image)), 0).float()
image_tensor = image_tensor.unsqueeze(0) # Add batch dimension
# Check if processor requires specific form of input
inputs = processor(images=image_tensor, return_tensors="pt")
outputs = model(**inputs)
logits = outputs.logits
# Get prediction
#print(logits)
predicted_class_idx = logits.argmax(-1).item()
predicted_class = model.config.id2label[predicted_class_idx]
print("Predicted class:", predicted_class, "prob: ", logits[0][predicted_class_idx])
What am I doing wrong?