Spaces:
Sleeping
Sleeping
| import torch | |
| from PIL import Image | |
| import torchvision.transforms as transforms | |
| from model import load_model # Import the load_model function from your model.py | |
| # Set device (GPU if available) | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| # Specify the checkpoint path (local path where the checkpoint will be downloaded) | |
| checkpoint_path = "checkpoint.pth" | |
| # Load the model and tokenizer using the helper function | |
| model, tokenizer = load_model(checkpoint_path, device) | |
| # Define the image transformation (should match what was used during training) | |
| transform = transforms.Compose([ | |
| transforms.Resize((224, 224)), | |
| transforms.ToTensor(), | |
| transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) | |
| ]) | |
| # Specify the path to an example image (update this to a valid image file path on your system) | |
| image_path = "/home/rishabh/coding/minor_project/for deployment/image-captionator/GJwtW4JGdR4.jpg" | |
| # Open and preprocess the image | |
| image = Image.open(image_path).convert("RGB") | |
| image_tensor = transform(image).unsqueeze(0).to(device) | |
| # Use the model's generate function to produce a caption | |
| output_ids = model.generate(pixel_values=image_tensor, max_length=30, num_beams=4) | |
| caption = tokenizer.decode(output_ids[0], skip_special_tokens=True) | |
| # Print the generated caption | |
| print("Generated Caption:", caption) | |