Spaces:

FinalProj5190
/

README

Configuration error

App Files Files Community

sqiud commited on Dec 10, 2024

Commit

a040736

verified ·

1 Parent(s): 6524ea5

Update README.md

Browse files

Files changed (1) hide show

README.md +66 -0

README.md CHANGED Viewed

@@ -13,6 +13,72 @@ lat_std = 0.0006361722351128644 \
 lon_mean = -75.19150880602636 \
 lon_std = 0.000611411894337979
 The model can be loaded using:
 ```
 from huggingface_hub import hf_hub_download

 lon_mean = -75.19150880602636 \
 lon_std = 0.000611411894337979
+The model implementation is found here:
+```
+import torch
+import torch.nn as nn
+import torchvision.models as models
+import torchvision.transforms as transforms
+from torch.utils.data import DataLoader, Dataset
+from transformers import AutoImageProcessor, AutoModelForImageClassification
+from huggingface_hub import PyTorchModelHubMixin
+from PIL import Image
+import os
+import numpy as np
+from transformers import AutoModel
+class MultiModalModel(nn.Module):
+    def __init__(self, image_model_name="google/vit-base-patch16-224", num_gps_features=2, output_dim=2):
+        super(MultiModalModel, self).__init__()
+        # Load Vision Transformer for feature extraction
+        self.image_model = AutoModel.from_pretrained(image_model_name, output_hidden_states=True)
+        # Reduce image features to match GPS features
+        self.image_fc = nn.Sequential(
+            nn.Linear(self.image_model.config.hidden_size, 256),
+            nn.ReLU(),
+        )
+        # Process GPS features
+        self.gps_fc = nn.Sequential(
+            nn.Linear(num_gps_features, 128),
+            nn.ReLU(),
+            nn.Dropout(0.3),
+            nn.Linear(128, 256),
+        )
+        # Combine image and GPS features for regression
+        self.regressor = nn.Sequential(
+            nn.Linear(256 + 256, 512),  # 256 from image + 256 from GPS
+            nn.ReLU(),
+            nn.Dropout(0.4),
+            nn.Linear(512, output_dim),
+        )
+    def forward(self, image, gps):
+        # Extract image features from the last hidden state
+        image_outputs = self.image_model(image)
+        image_features = image_outputs.last_hidden_state[:, 0, :]  # CLS token features
+        image_features = self.image_fc(image_features)
+        # Process GPS features
+        gps_features = self.gps_fc(gps)
+        # Concatenate image and GPS features
+        combined_features = torch.cat([image_features, gps_features], dim=1)
+        # Final regression
+        return self.regressor(combined_features)
+    def save_model(self, save_path):
+        """Save model locally using the Hugging Face format."""
+        self.save_pretrained(save_path)
+    def push_model(self, repo_name):
+        """Push the model to the Hugging Face Hub."""
+        self.push_to_hub(repo_name)
+```
 The model can be loaded using:
 ```
 from huggingface_hub import hf_hub_download