File size: 1,609 Bytes
c58e320 b0c90ff a3a47ec c58e320 9a69686 3e7fa75 9a69686 3e7fa75 9a69686 3e7fa75 9a69686 c58e320 3e7fa75 a3a47ec |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
---
library_name: transformers
tags: []
---
# OLD MODEL: DO NOT USE FOR LEADERBOARD
# Model Card for Model ID
This is a fine-tuned Vision Transformer (ViT) model from Google. The model was loaded and fine-tuned on the training data collected.
Link: https://huggingface.co/google/vit-base-patch16-224-in21k
lat_mean = 39.95164939753852
lat_std = 0.0007290994359226359
lon_mean = -75.191420541785
lon_std = 0.000733160718757529
```python
model_name = "AppliedMLReedShreya/ViT_Attempt_1"
config = AutoConfig.from_pretrained(model_name)
config.num_labels = 2 # We need two outputs: latitude and longitude
# Load the pre-trained ViT model
vit_model = AutoModelForImageClassification.from_pretrained(model_name, config=config)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'Using device: {device}')
vit_model = vit_model.to(device)
# Initialize lists to store predictions and actual values
all_preds = []
all_actuals = []
vit_model.eval()
with torch.no_grad():
for images, gps_coords in val_dataloader:
images, gps_coords = images.to(device), gps_coords.to(device)
outputs = vit_model(images).logits
# Denormalize predictions and actual values
preds = outputs.cpu() * torch.tensor([lat_std, lon_std]) + torch.tensor([lat_mean, lon_mean])
actuals = gps_coords.cpu() * torch.tensor([lat_std, lon_std]) + torch.tensor([lat_mean, lon_mean])
all_preds.append(preds)
all_actuals.append(actuals)
# Concatenate all batches
all_preds = torch.cat(all_preds).numpy()
all_actuals = torch.cat(all_actuals).numpy()
``` |