File size: 3,349 Bytes

import requests
import torch
from torchvision import transforms, models
import torch.nn as nn
from PIL import Image
import io

def classify_image_huggingface(image_path, repo_id="mertincesu/property-room-classifier"):
    # Automatically select the best available device
    device = torch.device("cuda" if torch.cuda.is_available() else 
                           "mps" if torch.backends.mps.is_available() else 
                           "cpu")
    print(f"Using device: {device}")

    # Load model weights directly from Hugging Face
    model_url = f"https://huggingface.co/{repo_id}/resolve/main/pytorch_model.bin"
    
    try:
        # Download the model file
        print(f"Downloading model from {model_url}")
        response = requests.get(model_url)
        response.raise_for_status()
        
        # Load the model weights
        model_binary = io.BytesIO(response.content)
        state_dict = torch.load(model_binary, map_location=device)
        
        # Extract model state dict and label mapping
        if 'model_state_dict' in state_dict:
            model_state = state_dict['model_state_dict']
            label_to_index = state_dict.get('label_to_index', {
                'bath': 0,
                'bed': 1,
                'din': 2,
                'kitchen': 3,
                'living': 4
            })
        else:
            model_state = state_dict
            label_to_index = {
                'bath': 0,
                'bed': 1,
                'din': 2,
                'kitchen': 3,
                'living': 4
            }
        
        # Determine number of classes from weights
        num_classes = model_state['fc.weight'].shape[0]
        
        # Define model structure dynamically
        model = models.resnet50(weights=None)
        num_ftrs = model.fc.in_features
        model.fc = nn.Linear(num_ftrs, num_classes)
        
        # Load the model weights
        model.load_state_dict(model_state)
        model.to(device)
        model.eval()
        
        # Define image transformation
        transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
      
        # Load and process the image
        img = Image.open(image_path).convert('RGB')
        img_tensor = transform(img).unsqueeze(0).to(device)
        
        # Make prediction
        with torch.no_grad():
            outputs = model(img_tensor)
            probs = torch.nn.functional.softmax(outputs, dim=1)
            _, predicted = torch.max(outputs, 1)
        
        # Reverse the label mapping
        index_to_label = {v: k for k, v in label_to_index.items()}
        
        result = {
            'class': index_to_label[predicted.item()],
            'confidence': probs[0][predicted.item()].item() * 100
        }
        
        print(f"Class: {result['class']}")
        print(f"Confidence: {result['confidence']:.2f}%")
        return result
        
    except Exception as e:
        print(f"Error: {e}")
        return None

if __name__ == "__main__":
    import sys
    
    if len(sys.argv) > 1:
        image_path = sys.argv[1]
    else:
        image_path = input("Enter path to image: ")
    
    classify_image_huggingface(image_path)